external inputs

destination_index document_type:{"original": "document"}

python elasticsearch_to_rsyslog.py source_index destination_index

from elasticsearch import Elasticsearch
import json, socket, sys

source_cluster = ['server1', 'server2']
rsyslog_address = '127.0.0.1'
rsyslog_port = 5514

es = Elasticsearch(source_cluster,
      retry_on_timeout=True,
      max_retries=10)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((rsyslog_address, rsyslog_port))


result = es.search(index=sys.argv[1], scroll='1m', search_type='scan', size=500)

while True:
  res = es.scroll(scroll_id=result['_scroll_id'], scroll='1m')
  for hit in result['hits']['hits']:
    s.send(sys.argv[2] + ' ' + hit["_type"] + ':' + json.dumps(hit["_source"])+'\n')
  if not result['hits']['hits']:
    break

s.close()

def de_dot(my_dict):
  for key, value in my_dict.iteritems():
    if '.' in key:
      my_dict[key.replace('.','_')] = my_dict.pop(key)
    if type(value) is dict:
      my_dict[key] = de_dot(my_dict.pop(key))
  return my_dict

s.send(sys.argv[2] + ' ' + hit["_type"] + ':' + json.dumps(de_dot(hit["_source"]))+'\n')

# messages bigger than this are truncated
$maxMessageSize 10000000  # ~10MB

# load the TCP input and the ES output modules
module(load="imtcp")
module(load="omelasticsearch")

main_queue(
  # buffer up to 1M messages in memory
  queue.size="1000000"
  # these threads process messages and send them to Elasticsearch
  queue.workerThreads="4"
  # rsyslog processes messages in batches to avoid queue contention
  # this will also be the Elasticsearch bulk size
  queue.dequeueBatchSize="4000"
)

# we use templates to specify how the data sent to Elasticsearch looks like
template(name="document" type="list"){
  # the "msg" variable contains the document
  property(name="msg")
}
template(name="index" type="list"){
  # "hostname" has the index name
  property(name="hostname")
}
template(name="type" type="list"){
  # "syslogtag" has the type name
  property(name="syslogtag")
}

# start the TCP listener on the port we pointed the Python script to
input(type="imtcp" port="5514")

# sending data to Elasticsearch, using the templates defined earlier
action(type="omelasticsearch"
  template="document"
  dynSearchIndex="on" searchIndex="index"
  dynSearchType="on" searchType="type"
  server="localhost"  # destination Elasticsearch host
  serverport="9200"   # and port
  bulkmode="on"  # use the bulk API
  action.resumeretrycount="-1"  # retry indefinitely if Elasticsearch is unreachable
)

rsyslogd -i /var/run/rsyslog_reindexer.pid -f /home/me/rsyslog_reindexer.conf

external inputs

Using rsyslog to Reindex/Migrate Elasticsearch data

Writing the custom script

Configuring rsyslog

About

Related Products