NREL
diff --git a/‎build/cmake/rsyslog.cmake
Lines changed: 1 addition & 0 deletions b/‎build/cmake/rsyslog.cmake
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/default.yml
Lines changed: 7 additions & 0 deletions b/‎config/default.yml
Lines changed: 7 additions & 0 deletions
diff --git a/‎config/elasticsearch_templates.json
Lines changed: 114 additions & 18 deletions b/‎config/elasticsearch_templates.json
Lines changed: 114 additions & 18 deletions
diff --git a/‎config/test.yml
Lines changed: 0 additions & 1 deletion b/‎config/test.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎templates/etc/rsyslog.conf.mustache
Lines changed: 41 additions & 4 deletions b/‎templates/etc/rsyslog.conf.mustache
Lines changed: 41 additions & 4 deletions
@@ -91,6 +91,7 @@ list(APPEND RSYSLOG_CONFIGURE_CMD --prefix=${INSTALL_PREFIX_EMBEDDED})
 list(APPEND RSYSLOG_CONFIGURE_CMD --enable-liblogging-stdlog)
 list(APPEND RSYSLOG_CONFIGURE_CMD --disable-libgcrypt)
 list(APPEND RSYSLOG_CONFIGURE_CMD --enable-imptcp)
+list(APPEND RSYSLOG_CONFIGURE_CMD --enable-impstats)
 list(APPEND RSYSLOG_CONFIGURE_CMD --enable-mmjsonparse)
 list(APPEND RSYSLOG_CONFIGURE_CMD --enable-mmutf8fix)
 list(APPEND RSYSLOG_CONFIGURE_CMD --enable-elasticsearch)
 
@@ -162,6 +162,13 @@ elasticsearch:
       breaker:
         fielddata:
           limit: 60%
+    index:
+      translog:
+        # Sync the data to disk asynchronously on a fixed interval, rather than
+        # for every request. This significantly helps indexing throughput (at
+        # the risk of losing a few seconds of data if things crash).
+        durability: async
+        sync_interval: 10s
 analytics:
   adapter: elasticsearch
   timezone: UTC
 
@@ -81,66 +81,162 @@
               "type": "string",
               "index": "not_analyzed"
             },
-            "user_id": {
+            "backend_response_time": {
+              "type": "integer"
+            },
+            "gatekeeper_denied_code": {
               "type": "string",
-              "index": "not_analyzed"
+              "analyzer": "keyword_lowercase"
+            },
+            "internal_gatekeeper_time": {
+              "type": "float"
+            },
+            "internal_response_time": {
+              "type": "float"
+            },
+            "proxy_overhead": {
+              "type": "integer"
+            },
+            "request_accept": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_accept_encoding": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
             },
             "request_at": {
               "type": "date"
             },
+            "request_basic_auth_username": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_connection": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_content_type": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_hierarchy": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_host": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
             "request_ip": {
               "type": "string",
               "index": "not_analyzed"
             },
+            "request_ip_city": {
+              "type": "string",
+              "index": "not_analyzed"
+            },
             "request_ip_country": {
               "type": "string",
               "index": "not_analyzed"
             },
+            "request_ip_location": {
+              "type": "geo_point",
+              "lat_lon": true
+            },
             "request_ip_region": {
               "type": "string",
               "index": "not_analyzed"
             },
-            "request_ip_city": {
+            "request_method": {
               "type": "string",
-              "index": "not_analyzed"
+              "analyzer": "keyword_lowercase"
+            },
+            "request_origin": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_path": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
             },
             "request_path_hierarchy": {
               "type": "string",
               "analyzer": "path_hierarchy_lowercase"
             },
+            "request_referer": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_scheme": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
             "request_size": {
               "type": "integer"
             },
-            "response_status": {
-              "type": "short"
+            "request_url": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_user_agent": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_user_agent_family": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "request_user_agent_type": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
             },
             "response_age": {
               "type": "integer"
             },
+            "response_cache": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "response_content_encoding": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
             "response_content_length": {
               "type": "integer"
             },
+            "response_content_type": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
+            "response_server": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
+            },
             "response_size": {
               "type": "integer"
             },
-            "response_time": {
-              "type": "integer"
+            "response_status": {
+              "type": "short"
             },
-            "backend_response_time": {
+            "response_time": {
               "type": "integer"
             },
-            "proxy_overhead": {
-              "type": "integer"
+            "response_transfer_encoding": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
             },
-            "internal_response_time": {
-              "type": "float"
+            "user_email": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
             },
-            "internal_gatekeeper_time": {
-              "type": "float"
+            "user_id": {
+              "type": "string",
+              "index": "not_analyzed"
             },
-            "request_ip_location": {
-              "type": "geo_point",
-              "lat_lon": true
+            "user_registration_source": {
+              "type": "string",
+              "analyzer": "keyword_lowercase"
             }
           }
         }
 
@@ -5,7 +5,6 @@ http_port: 9080
 https_port: 9081
 analytics:
   timezone: America/Denver
-  log_request_url_query_params_separately: true
 static_site:
   port: 13013
 nginx:
 
@@ -4,6 +4,16 @@ global(maxMessageSize="32k")
 
 global(workDirectory="{{db_dir}}/rsyslog")
 
+# Output queue statistics periodically so the health of the queue can be
+# inspected.
+module(
+  load="impstats"
+  interval="60"
+  resetCounters="on"
+  log.syslog="off"
+  log.file="{{log_dir}}/rsyslog/stats.log"
+)
+
 # Load Modules
 module(load="imtcp" MaxSessions="500")
 module(load="mmjsonparse")
@@ -160,36 +170,59 @@ template(name="elasticsearch-json-record" type="subtree" subtree="$!usr!es")
 # Output to ElasticSearch.
 # A disk-assisted memory queue is used for buffering.
 local0.info action(
+  name="output-elasticsearch"
   type="omelasticsearch"
   server="{{elasticsearch._first_server.host}}"
   serverport="{{elasticsearch._first_server.port}}"
   searchIndex="elasticsearch-index"
   dynSearchIndex="on"
   searchType="log"
   template="elasticsearch-json-record"
+
   # Enable bulk indexing, so batches of records are sent as a single HTTP
   # request.
   bulkmode="on"
   bulkId="elasticsearch-id"
   dynBulkId="on"
+
+  # Allow bulk indexing of batches *up to* this size.
+  #
+  # Note that as long as Elasticsearch is keeping up and data isn't being
+  # queued by rsyslog, then rsyslog will send data as quickly as it can to
+  # Elasticsearch (so the batch sizes might be much smaller). See
+  # http://www.gossamer-threads.com/lists/rsyslog/users/17550
+  queue.dequeuebatchsize="5000"
+
   # Require indexing by all replica shards.
   asyncrepl="off"
+
   # For the in-memory queue, use a linked-list (so the memory doesn't have to
   # be pre-allocated based on a fixed size).
   queue.type="LinkedList"
+
   # Set a filename, so the queue is disk assisted. This allows for offloading
   # the data from the memory queue to disk if the queue becomes bigger than
   # expected.
   queue.filename="queue-elasticsearch"
+
+  # Set thresholds for when the memory queue is too big and should use the
+  # disk (note the disk queue size is not bounded by the queue.size, that only
+  # applies to the memory portion).
+  queue.size="15000"
+  queue.highwatermark="10000"
+  queue.lowwatermark="2000"
+
   # Persist data to disk on this interval (in seconds). We're okay with some
   # loss in the event of unexpected failures.
   queue.checkpointinterval="10"
+
   # Persist data to disk on graceful shutdowns.
   queue.saveonshutdown="on"
-  # Set thresholds for when the memory queue is too big and should use the
-  # disk.
-  queue.highwatermark="10000"
-  queue.lowwatermark="2000"
+
+  # If Elasticsearch is inaccessible, retry on this interval (in seconds)
+  # indefinitely (so we don't stop logging to Elasticsearch in case it goes
+  # down for a longer period of time).
+  action.resumeInterval="30"
   action.resumeRetryCount="-1"
 )
 {{/analytics._output_elasticsearch?}}
@@ -361,6 +394,7 @@ template(name="sql-json-record" type="subtree" subtree="$!usr!sql")
 # Output to Kafka.
 # A disk-assisted memory queue is used for buffering.
 local0.info action(
+  name="output-kafka"
   type="omkafka"
   broker=[{{kafka._rsyslog_broker}}]
   topic="{{kafka.topic}}"
@@ -370,6 +404,7 @@ local0.info action(
   queue.filename="queue-kafka"
   queue.checkpointinterval="10"
   queue.saveonshutdown="on"
+  queue.size="15000"
   queue.highwatermark="10000"
   queue.lowwatermark="2000"
   action.resumeRetryCount="-1"
@@ -381,13 +416,15 @@ template(name="all-json-record" type="list") {
   property(name="$!raw") constant(value="\n")
 }
 local0.info action(
+  name="output-file"
   type="omfile"
   file="{{log_dir}}/rsyslog/requests.log.gz"
   template="all-json-record"
   queue.type="LinkedList"
   queue.filename="queue-file"
   queue.checkpointinterval="10"
   queue.saveonshutdown="on"
+  queue.size="15000"
   queue.highwatermark="10000"
   queue.lowwatermark="2000"
   zipLevel="3"