Guides for rsyslog

/path/to/log.file

destination_index document_type:{"original": "document"}

python elasticsearch_to_rsyslog.py source_index destination_index

from elasticsearch import Elasticsearch
import json, socket, sys

source_cluster = ['server1', 'server2']
rsyslog_address = '127.0.0.1'
rsyslog_port = 5514

es = Elasticsearch(source_cluster,
      retry_on_timeout=True,
      max_retries=10)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((rsyslog_address, rsyslog_port))

result = es.search(index=sys.argv[1], scroll='1m', search_type='scan', size=500)

while True:
  res = es.scroll(scroll_id=result['_scroll_id'], scroll='1m')
  for hit in result['hits']['hits']:
    s.send(sys.argv[2] + ' ' + hit["_type"] + ':' + json.dumps(hit["_source"])+'\n')
  if not result['hits']['hits']:
    break

s.close()

def de_dot(my_dict):
  for key, value in my_dict.iteritems():
    if '.' in key:
      my_dict[key.replace('.','_')] = my_dict.pop(key)
    if type(value) is dict:
      my_dict[key] = de_dot(my_dict.pop(key))
  return my_dict

s.send(sys.argv[2] + ' ' + hit["_type"] + ':' + json.dumps(de_dot(hit["_source"]))+'\n')

# messages bigger than this are truncated
$maxMessageSize 10000000  # ~10MB

# load the TCP input and the ES output modules
module(load="imtcp")
module(load="omelasticsearch")

main_queue(
  # buffer up to 1M messages in memory
  queue.size="1000000"
  # these threads process messages and send them to Elasticsearch
  queue.workerThreads="4"
  # rsyslog processes messages in batches to avoid queue contention
  # this will also be the Elasticsearch bulk size
  queue.dequeueBatchSize="4000"
)

# we use templates to specify how the data sent to Elasticsearch looks like
template(name="document" type="list"){
  # the "msg" variable contains the document
  property(name="msg")
}
template(name="index" type="list"){
  # "hostname" has the index name
  property(name="hostname")
}
template(name="type" type="list"){
  # "syslogtag" has the type name
  property(name="syslogtag")
}

# start the TCP listener on the port we pointed the Python script to
input(type="imtcp" port="5514")

# sending data to Elasticsearch, using the templates defined earlier
action(type="omelasticsearch"
  template="document"
  dynSearchIndex="on" searchIndex="index"
  dynSearchType="on" searchType="type"
  server="localhost"  # destination Elasticsearch host
  serverport="9200"   # and port
  bulkmode="on"  # use the bulk API
  action.resumeretrycount="-1"  # retry indefinitely if Elasticsearch is unreachable
)

rsyslogd -i /var/run/rsyslog_reindexer.pid -f /home/me/rsyslog_reindexer.conf

bin/zookeeper-server-start.sh config/zookeeper.properties
bin/kafka-server-start.sh config/server.properties
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic rsyslog_logstash
bin/logstash -f logstash.conf
module(load="imuxsock")  # will listen to your local syslog
module(load="imfile")    # if you want to tail files
module(load="omkafka")   # lets you send to Kafka

input(type="imfile"
  File="/opt/logs/example*.log"
  Tag="examplelogs"
)

template(name="json_lines" type="list" option.json="on") {
  constant(value="{")
  constant(value="\"timestamp\":\"")
  property(name="timereported" dateFormat="rfc3339")
  constant(value="\",\"message\":\"")
  property(name="msg")
  constant(value="\",\"host\":\"")
  property(name="hostname")
  constant(value="\",\"severity\":\"")
  property(name="syslogseverity-text")
  constant(value="\",\"facility\":\"")
  property(name="syslogfacility-text")
  constant(value="\",\"syslog-tag\":\"")
  property(name="syslogtag")
  constant(value="\"}")
}

main_queue(
  queue.workerthreads="1"      # threads to work on the queue
  queue.dequeueBatchSize="100" # max number of messages to process at once
  queue.size="10000"           # max queue size
)

action(
  broker=["localhost:9092"]
  type="omkafka"
  topic="rsyslog_logstash"
  template="json"
)

input {
  kafka {
    zk_connect => "localhost:2181"
    topic_id => "rsyslog_logstash"
  }
}

output {
  elasticsearch {
    hosts => "localhost" # it used to be "host" pre-2.0
    port => 9200
    #ssl => "true"
    #protocol => "http" # removed in 2.0
  }
}

# system logs
module(load="imuxsock")
module(load="imklog")
# file
module(load="imfile")
# parser
module(load="mmnormalize")
# sender
module(load="omelasticsearch")
input(type="imfile"
      File="/var/log/apache*.log"
      Tag="apache:"
)
main_queue(
  queue.workerThreads="4"
  queue.dequeueBatchSize="1000"
  queue.size="10000"
)
main_queue(
  queue.workerThreads="4"
  queue.dequeueBatchSize="1000"
  queue.highWatermark="500000"    # max no. of events to hold in memory
  queue.lowWatermark="200000"     # use memory queue again, when it's back to this level
  queue.spoolDirectory="/var/run/rsyslog/queues"  # where to write on disk
  queue.fileName="stats_ruleset"
  queue.maxDiskSpace="5g"        # it will stop at this much disk space
  queue.size="5000000"           # or this many messages
  queue.saveOnShutdown="on"      # save memory queue contents to disk when rsyslog is exiting
)
action(type="mmnormalize"
  rulebase="/opt/rsyslog/apache.rb"
)
version=2

rule=:%clientip:word% %ident:word% %auth:word% [%timestamp:char-to:]%] "%verb:word% %request:word% HTTP/%httpversion:float%" %response:number% %bytes:number% "%referrer:char-to:"%" "%agent:char-to:"%"%blob:rest%

head -1 /path/to/log.file | /usr/lib/lognorm/lognormalizer -r /path/to/rulebase.rb -e json
template(name="all-json" type="list"){
  property(name="$!all-json")
}
template(name="logstash-index"
  type="list") {
    constant(value="logstash-")
    property(name="timereported" dateFormat="rfc3339" position.from="1" position.to="4")
    constant(value=".")
    property(name="timereported" dateFormat="rfc3339" position.from="6" position.to="7")
    constant(value=".")
    property(name="timereported" dateFormat="rfc3339" position.from="9" position.to="10")
}
action(type="omelasticsearch"
  template="all-json"
  dynSearchIndex="on"
  searchIndex="logstash-index"
  searchType="apache"
  server="MY-ELASTICSEARCH-SERVER"
  bulkmode="on"
  action.resumeretrycount="-1"
)
template(name="plain-syslog"
  type="list") {
    constant(value="{")
      constant(value="\"timestamp\":\"")     property(name="timereported" dateFormat="rfc3339")
      constant(value="\",\"host\":\"")        property(name="hostname")
      constant(value="\",\"severity\":\"")    property(name="syslogseverity-text")
      constant(value="\",\"facility\":\"")    property(name="syslogfacility-text")
      constant(value="\",\"tag\":\"")   property(name="syslogtag" format="json")
      constant(value="\",\"message\":\"")    property(name="msg" format="json")
    constant(value="\"}")
}
if $parsesuccess == "OK" then {
 action(type="omelasticsearch"
  template="all-json"
  ...
 )
} else {
 action(type="omelasticsearch"
  template="plain-syslog"
  ...
 )
}
action(type="omelasticsearch"
  template="all-json or plain-syslog"
  searchIndex="LOGSENE-APP-TOKEN-GOES-HERE"
  searchType="apache"
  server="logsene-receiver.sematext.com"
  serverport="80"
  bulkmode="on"
  action.resumeretrycount="-1"
)
module(load="imuxsock") # provides support for local system logging (e.g. via logger command)
module(load="imklog")   # provides kernel logging support (previously done by rklogd)
$ActionFileDefaultTemplate RSYSLOG_TraditionalFileFormat
*.*     /var/log/messages

# logger 'hello world!'

Mar 27 14:16:47 micahsyslog root: hello world!

# service sshd restart

Mar 27 14:19:27 micahsyslog sshd[23295]: Received signal 15; terminating.
Mar 27 14:19:27 micahsyslog sshd[16602]: Server listening on 0.0.0.0 port 22.
Mar 27 14:19:27 micahsyslog sshd[16602]: Server listening on :: port 22.

2013-03-27T21:51:29.139796+00:00 micahsyslog root: Hello world #2!

prefix=%date:date-rfc3164% %hostname:word% %tag:word%
rule=user: new user: name=%user:char-to:,%, UID=%uid:number%, GID=%gid:number%, home=%home:char-to:,%, shell=%shell:word%
rule=test: Hello %x:number%

# logger “Hello 55”

# lognormalizer -r rsyslog.d/rules.rb -t test < /var/log/messages 
[cee@115 event.tags="test" x="55" tag="root:" hostname="micahsyslog" date="Mar 27 22:53:30"]

# bin/elasticsearch -f

module(load="mmnormalize")
module(load="omelasticsearch")

template(name="test" type="subtree" subtree="$!")

set $!fac = $syslogfacility;
set $!host = $hostname;
set $!sev = $syslogseverity;
set $!time = $timereported;
set $!tgen = $timegenerated;
set $!tag = $syslogtag;
set $!prog = $programname;

action(type="omelasticsearch" server="localhost" template="test")

curl -XPUT http://elasticsearch.hostname:9200/logs -d '{"mappings":{"events":{"_timestamp":{"enabled": true, "store": "yes"},"prog":{"store":"yes"},"host":{"store":"yes"}}}}'

action(type="omelasticsearch" server="elasticsearch.hostname" template="cee_template_name" searchIndex="logs" bulkmode="on")

curl 'http://elasticsearch.hostname:9200/logs/_search?pretty=1&fields=_source,_timestamp&size=100' -d '{"sort":{"_timestamp": "desc"}}' | less

# Modules
$ModLoad imtcp
$ModLoad imudp
$ModLoad imuxsock
$ModLoad imklog
# Templates
# log every host in its own directory
$template RemoteHost,"/var/syslog/hosts/%HOSTNAME%/%$YEAR%/%$MONTH%/%$DAY%/syslog.log"
### Rulesets
# Local Logging
$RuleSet local
kern.*                                                 /var/log/messages
*.info;mail.none;authpriv.none;cron.none                /var/log/messages
authpriv.*                                              /var/log/secure
mail.*                                                  -/var/log/maillog
cron.*                                                  /var/log/cron
*.emerg                                                 *
uucp,news.crit                                          /var/log/spooler
local7.*                                                /var/log/boot.log
# use the local RuleSet as default if not specified otherwise
$DefaultRuleset local

# Remote Logging
$RuleSet remote
*.* ?RemoteHost
# Send messages we receive to Gremlin
*.* @@W.X.Y.Z:514
### Listeners
# bind ruleset to tcp listener
$InputTCPServerBindRuleset remote
# and activate it:
$InputTCPServerRun 10514

$InputUDPServerBindRuleset remote
$UDPServerRun 514
module(load="imfile" PollingInterval="10")
# needs to be done just once. PollingInterval is a module directive and is only set once when loading the module
# File 1
input(type="imfile" File="/path/to/file1" 
Tag="tag1" 
StateFile="/var/spool/rsyslog/statefile1" 
Severity="error" 
Facility="local7")
# File 2
input(type="imfile" File="/path/to/file2" 
Tag="tag2" 
StateFile="/var/spool/rsyslog/statefile2")
# ... and so on ...
#
module(load="imfile" PollingInterval="10")
# File 1
input(type="imfile" File="/path/to/file1" 
Tag="tag1" 
StateFile="/var/spool/rsyslog/statefile1" 
Severity="error" 
Facility="local7")
PollingInterval 10

Guides for rsyslog

Using rsyslog to Reindex/Migrate Elasticsearch data

Writing the custom script

Configuring rsyslog

Connecting with Logstash via Apache Kafka

Getting the ingredients

Configuring rsyslog

Configuring Logstash

Recipe: Apache Logs + rsyslog (parsing) + Elasticsearch

Getting the ingredients

Loading modules

Configure inputs

Queue and workers

Parsing with mmnormalize

Template for parsed logs

Template for time-based indices

Putting both Apache and system logs together

Coupling with Logstash via Redis

Tutorial: Sending impstats Metrics to Elasticsearch Using Rulesets and Queues

rsyslog and ElasticSearch

Storing and forwarding remote messages

Things to think about

Config Statements

How it works

Important

Using the Text File Input Module

Things to think about

Config Statements

How it works

Important

On the Use of English

What this book is about