def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) backend = self.getConfigurationValue('backend') self.backend_client = None if backend == 'DictStore': import simplekv.memory self.backend_client = None self.kv_store = simplekv.memory.DictStore() elif backend == 'RedisStore': import simplekv.memory.redisstore self.backend_client = self.getRedisClient() self.kv_store = simplekv.memory.redisstore.RedisStore(self.backend_client) elif backend == 'MemcacheStore': import simplekv.memory.memcachestore self.backend_client = self.getMemcacheClient() self.kv_store = simplekv.memory.memcachestore.MemcacheStore(self.backend_client) self.set_buffer = None if self.getConfigurationValue('store_interval_in_secs') or self.getConfigurationValue('batch_size'): if backend == 'RedisStore': self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self.setRedisBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) else: self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self.setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self.setBuffered self._get = self.get self.get = self.getBuffered self._delete = self.delete self.delete = self.deleteBuffered self._pop = self.pop self.pop = self.popBuffered
def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.backend = self.getConfigurationValue('backend') self.backend_client = None self.kv_store = None self.set_buffer = None if self.backend == 'DictStore': import simplekv.memory self.kv_store = simplekv.memory.DictStore() elif self.backend == 'RedisStore': import simplekv.memory.redisstore self.backend_client = self._getRedisClient() self.kv_store = simplekv.memory.redisstore.RedisStore(self.backend_client) elif self.backend == 'MemcacheStore': import simplekv.memory.memcachestore self.backend_client = self._getMemcacheClient() self.kv_store = simplekv.memory.memcachestore.MemcacheStore(self.backend_client) else: self.logger("Unknown backend type %s. Please check." % backend) self.lumbermill.shutDown(); if self.getConfigurationValue('store_interval_in_secs') or self.getConfigurationValue('batch_size'): if self.backend == 'RedisStore': self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self._setRedisBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) else: self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self._setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self._setBuffered self._get = self.get self.get = self._getBuffered self._delete = self.delete self.delete = self._deleteBuffered self._pop = self.pop self.pop = self._popBuffered
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) try: self.sqs_resource = boto3.resource( 'sqs', region_name=self.getConfigurationValue('region'), api_version=None, use_ssl=True, verify=None, endpoint_url=None, aws_access_key_id=self.getConfigurationValue( 'aws_access_key_id'), aws_secret_access_key=self.getConfigurationValue( 'aws_secret_access_key'), aws_session_token=None, config=None) self.sqs_queue = self.sqs_resource.get_queue_by_name( QueueName=self.getConfigurationValue('queue')) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not connect to sqs service. Exception: %s, Error: %s." % (etype, evalue)) self.lumbermill.shutDown()
def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.batch_size = self.getConfigurationValue('batch_size') self.backlog_size = self.getConfigurationValue('backlog_size') self.file_name = self.getConfigurationValue('file_name') self.format = self.getConfigurationValue('format') self.compress = self.getConfigurationValue('compress') self.file_handles = {} if self.compress == 'gzip': try: # Import module into namespace of object. Otherwise it will not be accessible when process was forked. self.gzip_module = __import__('gzip') except ImportError: self.logger.error( 'Gzip compression selected but gzip module could not be loaded.' ) self.lumbermill.shutDown() if self.compress == 'snappy': try: self.snappy_module = __import__('snappy') except ImportError: self.logger.error( 'Snappy compression selected but snappy module could not be loaded.' ) self.lumbermill.shutDown() self.buffer = Buffer( self.batch_size, self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.backlog_size) TimedFunctionManager.startTimedFunction(self.closeStaleFileHandles)
def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) if len(self.getConfigurationValue('cluster')) == 0: redis_store = self.getConfigurationValue('server') self.client = self.getRedisClient() else: redis_store = self.getConfigurationValue('cluster') self.client = self.getClusterRedisClient() try: self.client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not connect to redis store at %s. Exception: %s, Error: %s." % (redis_store, etype, evalue)) self.lumbermill.shutDown() self.set_buffer = None if self.getConfigurationValue( 'store_interval_in_secs') or self.getConfigurationValue( 'batch_size'): self.set_buffer = Buffer( self.getConfigurationValue('batch_size'), self.setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self.setBuffered self._get = self.get self.get = self.getBuffered self._delete = self.delete self.delete = self.deleteBuffered self._pop = self.pop self.pop = self.popBuffered
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self.connection = self.connect() if not self.connection: self.lumbermill.shutDown() return BaseThreadedModule.initAfterFork(self)
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init es client after fork as mentioned in https://elasticsearch-py.readthedocs.org/en/master/ self.es = self.connect() if not self.es: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init monogdb client after fork. self.mongodb = self.connect() if not self.mongodb: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) if len(self.getConfigurationValue('cluster')) == 0: redis_store = self.getConfigurationValue('server') self.client = self.getRedisClient() else: redis_store = self.getConfigurationValue('cluster') self.client = self.getClusterRedisClient() try: self.client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (redis_store,etype, evalue)) self.lumbermill.shutDown() self.set_buffer = None if self.getConfigurationValue('store_interval_in_secs') or self.getConfigurationValue('batch_size'): self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self.setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self.setBuffered self._get = self.get self.get = self.getBuffered self._delete = self.delete self.delete = self.deleteBuffered self._pop = self.pop self.pop = self.popBuffered
def configure(self, configuration): BaseThreadedModule.configure(self, configuration) self.hostname = self.getConfigurationValue("hostname") self.fields = self.getConfigurationValue("fields") self.field_prefix = self.getConfigurationValue("field_prefix") self.timestamp_field = self.getConfigurationValue("timestamp_field") self.batch_size = self.getConfigurationValue('batch_size') self.backlog_size = self.getConfigurationValue('backlog_size') self.agent_conf = self.getConfigurationValue("agent_conf") if self.agent_conf: if self.agent_conf is True: self.agent_conf = "/etc/zabbix/zabbix_agentd.conf" if not os.path.isfile(self.agent_conf): self.logger.error("%s does not point to an existing file." % self.agent_conf) self.lumbermill.shutDown() self.zabbix_sender = ZabbixSender(use_config=self.agent_conf) else: self.logger.error("asdads") server = self.getConfigurationValue("server") port = 10051 if ":" in self.server: server, port = self.server.split(":") self.zabbix_sender = ZabbixSender(zabbix_server=server, port=port) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self.connection = self.connect() if not self.connection: self.lumbermill.shutDown() return BaseThreadedModule.initAfterFork(self)
def initAfterFork(self): # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffers = collections.defaultdict( lambda: Buffer(flush_size=self.buffer_size, callback=self.sendMergedEvent, interval=self.flush_interval_in_secs, maxsize=self.buffer_size)) BaseThreadedModule.initAfterFork(self)
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init es client after fork as mentioned in https://elasticsearch-py.readthedocs.org/en/master/ self.es = self.connect() if not self.es: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init monogdb client after fork. self.mongodb = self.connect() if not self.mongodb: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))
class RedisListSink(BaseThreadedModule): """ Send events to a redis lists. list: Name of redis list to send data to. server: Redis server to connect to. port: Port redis server is listening on. db: Redis db. password: Redis password. format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send. store_interval_in_secs: Send data to redis in x seconds intervals. batch_size: Send data to redis if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped. Configuration template: - RedisListSink: list: # <type: String; is: required> server: # <default: 'localhost'; type: string; is: optional> port: # <default: 6379; type: integer; is: optional> db: # <default: 0; type: integer; is: optional> password: # <default: None; type: None||string; is: optional> format: # <default: None; type: None||string; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> """ module_type = "output" """Set module type""" can_run_forked = True def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.format = self.getConfigurationValue('format') self.list = self.getConfigurationValue('list') self.client = redis.StrictRedis( host=self.getConfigurationValue('server'), port=self.getConfigurationValue('port'), password=self.getConfigurationValue('password'), db=self.getConfigurationValue('db')) try: self.client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue('server'), etype, evalue)) self.lumbermill.shutDown() def getStartMessage(self): return "[%s] on %s:%s. Max buffer size: %d" % ( self.list, self.getConfigurationValue('server'), self.getConfigurationValue('port'), self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def storeData(self, buffered_data): try: self.client.rpush(self.list, *buffered_data) return True except: exc_type, exc_value, exc_tb = sys.exc_info() self.logger.error( "Could not add event to redis list %s. Exception: %s, Error: %s." % (self.list, exc_type, exc_value)) return False def handleEvent(self, event): if self.format: publish_data = mapDynamicValue(self.format, event) else: publish_data = event self.buffer.append(publish_data) yield None def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class MongoDbSink(BaseThreadedModule): """ Store incoming events in a mongodb. host: Mongodb server. database: Mongodb database name. collection: Mongodb collection name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here. optinonal_connection_params: Other optional parameters as documented in https://api.mongodb.org/python/current/api/pymongo/mongo_client.html format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send. doc_id: Sets the document id for the committed event data. store_interval_in_secs: Send data to es in x seconds intervals. batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed. Configuration template: - MongoDbSink: host: # <default: 'localhost:27017'; type: string; is: optional> database: # <default: 'lumbermill'; type: string; is: optional> collection: # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional> optinonal_connection_params: # <default: {'serverSelectionTimeoutMS': 5}; type: dictionary; is: optional> format: # <default: None; type: None||string; is: optional> doc_id: # <default: '$(lumbermill.event_id)'; type: string; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 5000; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method. BaseThreadedModule.configure(self, configuration) self.format = self.getConfigurationValue('format') self.collection = self.getConfigurationValue('collection') self.database = self.getConfigurationValue('database') self.doc_id_pattern = self.getConfigurationValue("doc_id") def getStartMessage(self): return "DB: %s. Max buffer size: %d" % (self.getConfigurationValue('database'), self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init monogdb client after fork. self.mongodb = self.connect() if not self.mongodb: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def connect(self): try: mongodb_client = pymongo.MongoClient(self.getConfigurationValue('host'), **self.getConfigurationValue('optinonal_connection_params')) self.logger.debug(str(mongodb_client.server_info())) except: etype, evalue, etb = sys.exc_info() self.logger.warning("Connection to %s failed. Exception: %s, Error: %s." % (self.getConfigurationValue('host'), etype, evalue)) if not mongodb_client: self.logger.error("Connection to %s failed. Shutting down." % self.getConfigurationValue('host')) self.lumbermill.shutDown() else: self.logger.debug("Connection to %s successful." % self.getConfigurationValue('host')) return mongodb_client def handleEvent(self, event): if self.format: publish_data = self.getConfigurationValue('format', event) else: publish_data = event self.buffer.append(publish_data) yield None def storeData(self, events): mongo_db = self.mongodb[self.database] bulk_objects = {} for event in events: collection_name = mapDynamicValueInString(self.collection, event, use_strftime=True).lower() doc_id = mapDynamicValue(self.doc_id_pattern, event) if not doc_id: self.logger.error("Could not find doc_id %s for event %s." % (self.doc_id_pattern, event)) continue event['_id'] = doc_id if collection_name not in bulk_objects.keys(): bulk_objects[collection_name] = mongo_db[collection_name].initialize_ordered_bulk_op() try: bulk_objects[collection_name].insert(event) except: etype, evalue, etb = sys.exc_info() self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue)) self.logger.debug("Payload: %s" % event) if "Broken pipe" in evalue or "Connection reset by peer" in evalue: self.mongodb = self.connect() for collection_name, bulk_object in bulk_objects.iteritems(): try: result = bulk_object.execute() self.logger.debug(str(result)) except: etype, evalue, etb = sys.exc_info() self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue)) def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class RedisListSink(BaseThreadedModule): """ Send events to a redis lists. list: Name of redis list to send data to. server: Redis server to connect to. port: Port redis server is listening on. db: Redis db. password: Redis password. format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send. store_interval_in_secs: Send data to redis in x seconds intervals. batch_size: Send data to redis if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped. Configuration template: - RedisListSink: list: # <type: String; is: required> server: # <default: 'localhost'; type: string; is: optional> port: # <default: 6379; type: integer; is: optional> db: # <default: 0; type: integer; is: optional> password: # <default: None; type: None||string; is: optional> format: # <default: None; type: None||string; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> """ module_type = "output" """Set module type""" can_run_forked = True def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.format = self.getConfigurationValue('format') self.list = self.getConfigurationValue('list') self.client = redis.StrictRedis(host=self.getConfigurationValue('server'), port=self.getConfigurationValue('port'), password=self.getConfigurationValue('password'), db=self.getConfigurationValue('db')) try: self.client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue('server'),etype, evalue)) self.lumbermill.shutDown() def getStartMessage(self): return "publishing to %s:%s -> %s. Max buffer size: %d" % (self.getConfigurationValue('server'), self.getConfigurationValue('port'), self.list, self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def storeData(self, buffered_data): try: self.client.rpush(self.list, *buffered_data) return True except: exc_type, exc_value, exc_tb = sys.exc_info() self.logger.error("Could not add event to redis list %s. Exception: %s, Error: %s." % (self.list, exc_type, exc_value)) return False def handleEvent(self, event): if self.format: publish_data = mapDynamicValue(self.format, event) else: publish_data = event self.buffer.append(publish_data) yield None def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class SQSSink(BaseThreadedModule): """ Send messages to amazon sqs service. aws_access_key_id: Your AWS id. aws_secret_access_key: Your AWS password. region: The region in which to find your sqs service. queue: Queue name. format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set event.data will be send es MessageBody, all other fields will be send as MessageAttributes. store_interval_in_secs: Send data to redis in x seconds intervals. batch_size: Number of messages to collect before starting to send messages to sqs. This refers to the internal receive buffer of this plugin. When the receive buffer is maxed out, this plugin will always send the maximum of 10 messages in one send_message_batch call. backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped. values: ['us-east-1', 'us-west-1', 'us-west-2', 'eu-central-1', 'eu-west-1', 'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1', 'sa-east-1', 'us-gov-west-1', 'cn-north-1'] Configuration template: - SQSSink: aws_access_key_id: # <type: string; is: required> aws_secret_access_key: # <type: string; is: required> region: # <type: string; is: required> queue: # <type: string; is: required> format: # <default: None; type: None||string; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> receivers: - NextModule """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) # Set boto log level. logging.getLogger('boto3').setLevel(logging.CRITICAL) logging.getLogger('botocore').setLevel(logging.CRITICAL) self.batch_size = self.getConfigurationValue('batch_size') self.format = self.getConfigurationValue('format') def getStartMessage(self): return "Queue: %s [%s]. Max buffer size: %d" % ( self.getConfigurationValue('queue'), self.getConfigurationValue('region'), self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) try: self.sqs_resource = boto3.resource( 'sqs', region_name=self.getConfigurationValue('region'), api_version=None, use_ssl=True, verify=None, endpoint_url=None, aws_access_key_id=self.getConfigurationValue( 'aws_access_key_id'), aws_secret_access_key=self.getConfigurationValue( 'aws_secret_access_key'), aws_session_token=None, config=None) self.sqs_queue = self.sqs_resource.get_queue_by_name( QueueName=self.getConfigurationValue('queue')) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not connect to sqs service. Exception: %s, Error: %s." % (etype, evalue)) self.lumbermill.shutDown() def handleEvent(self, event): self.buffer.append(event) yield None def storeData(self, buffered_data): batch_messages = [] for event in buffered_data: try: id = event['lumbermill']['event_id'] except KeyError: id = "%032x%s" % (random.getrandbits(128), os.getpid()) message = {'Id': id} if self.format: event = mapDynamicValue(self.format, event) else: try: event = json.dumps(event) except: etype, evalue, etb = sys.exc_info() self.logger.warning( "Error while encoding event data: %s to json. Exception: %s, Error: %s." % (event, etype, evalue)) message['MessageBody'] = event batch_messages.append(message) if len(batch_messages) % 10: self.sqs_queue.send_messages(Entries=batch_messages) batch_messages = [] if len(batch_messages) > 0: self.send() def shutDown(self): self.buffer.flush()
class GraphiteSink(BaseThreadedModule): """ Send metrics to graphite server. server: Graphite server to connect to. port: Port carbon-cache is listening on. formats: Format of messages to send to graphite, e.g.: ['lumbermill.stats.event_rate_$(interval)s $(event_rate)']. store_interval_in_secs: Send data to graphite in x seconds intervals. batch_size: Send data to graphite if event count is above, even if store_interval_in_secs is not reached. backlog_size: Send count of events waiting for transmission. Events above count will be dropped. Here a simple example to send http_status statistics to graphite: ... - Statistics: interval: 10 fields: ['http_status'] - GraphiteSink: filter: if $(field_name) == "http_status" server: 127.0.0.1 batch_size: 1 formats: ['lumbermill.stats.http_200_$(interval)s $(field_counts.200)', 'lumbermill.stats.http_400_$(interval)s $(field_counts.400)', 'lumbermill.stats.http_total_$(interval)s $(total_count)'] ... Configuration template: - GraphiteSink: server: # <default: 'localhost'; type: string; is: optional> port: # <default: 2003; type: integer; is: optional> formats: # <type: list; is: required> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 50; type: integer; is: optional> backlog_size: # <default: 50; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.formats = self.getConfigurationValue('formats') self.connection_data = (self.getConfigurationValue('server'), self.getConfigurationValue('port')) self.connection = None def connect(self): # Connect to server connection = socket.socket() try: connection.connect(self.connection_data) return connection except: etype, evalue, etb = sys.exc_info() self.logger.error("Failed to connect to %s. Exception: %s, Error: %s." % (self.connection_data, etype, evalue)) return False def getStartMessage(self): return "%s:%s. Max buffer size: %d" % (self.connection_data[0], self.connection_data[1], self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self.connection = self.connect() if not self.connection: self.lumbermill.shutDown() return BaseThreadedModule.initAfterFork(self) def handleEvent(self, event): for format in self.formats: mapped_data = self.mapDynamicValue(format, event) if mapped_data: self.buffer.append("%s %s" % (mapped_data, int(time.time()))) yield None def storeData(self, events): for event in events: try: if not event.endswith("\n"): event += "\n" self.connection.send(event) return True except: etype, evalue, etb = sys.exc_info() self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue)) tries = 0 self.connection.close() self.connection = None while tries < 5 and not self.connection: time.sleep(5) self.logger.warning("Trying to reconnect to %s." % (self.connection_data)) # Try to reconnect. self.connection = self.connect() tries += 1 if not self.connection: self.logger.error("Reconnect failed. Shutting down.") self.lumbermill.shutDown() else: self.logger.info("Reconnection to %s successful." % (self.connection_data)) def shutDown(self): try: self.connection.close() except: pass
class ElasticSearchSink(BaseThreadedModule): """ Store the data dictionary in an elasticsearch index. The elasticsearch module takes care of discovering all nodes of the elasticsearch cluster. Requests will the be loadbalanced via round robin. action: Either index or update. If update be sure to provide the correct doc_id. format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send. nodes: Configures the elasticsearch nodes. read_timeout: Set number of seconds to wait until requests to elasticsearch will time out. connection_type: One of: 'thrift', 'http'. http_auth: 'user:password'. use_ssl: One of: True, False. index_name: Sets the index name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here. doc_id: Sets the es document id for the committed event data. routing: Sets a routing value (@see: http://www.elasticsearch.org/blog/customizing-your-document-routing/) Timepatterns like %Y.%m.%d are allowed here. ttl: When set, documents will be automatically deleted after ttl expired. Can either set time in milliseconds or elasticsearch date format, e.g.: 1d, 15m etc. This feature needs to be enabled for the index. @See: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-ttl-field.html sniff_on_start: The client can be configured to inspect the cluster state to get a list of nodes upon startup. Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this. sniff_on_connection_fail: The client can be configured to inspect the cluster state to get a list of nodes upon failure. Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this. consistency: One of: 'one', 'quorum', 'all'. store_interval_in_secs: Send data to es in x seconds intervals. batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed. Configuration template: - ElasticSearchSink: action: # <default: 'index'; type: string; is: optional; values: ['index', 'update']> format: # <default: None; type: None||string; is: optional> nodes: # <type: string||list; is: required> read_timeout: # <default: 10; type: integer; is: optional> connection_type: # <default: 'urllib3'; type: string; values: ['urllib3', 'requests']; is: optional> http_auth: # <default: None; type: None||string; is: optional> use_ssl: # <default: False; type: boolean; is: optional> index_name: # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional> doc_id: # <default: '$(lumbermill.event_id)'; type: string; is: optional> routing: # <default: None; type: None||string; is: optional> ttl: # <default: None; type: None||integer||string; is: optional> sniff_on_start: # <default: False; type: boolean; is: optional> sniff_on_connection_fail: # <default: False; type: boolean; is: optional> consistency: # <default: 'quorum'; type: string; values: ['one', 'quorum', 'all']; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method. BaseThreadedModule.configure(self, configuration) for module_name in ['elasticsearch', 'urllib3', 'requests']: if self.getConfigurationValue('log_level') == 'info': logging.getLogger(module_name).setLevel(logging.WARN) else: # Set log level for elasticsarch library if configured to other than default. logging.getLogger(module_name).setLevel(self.logger.level) self.action = self.getConfigurationValue('action') self.format = self.getConfigurationValue('format') self.consistency = self.getConfigurationValue("consistency") self.ttl = self.getConfigurationValue("ttl") self.index_name = self.getConfigurationValue("index_name") self.routing_pattern = self.getConfigurationValue("routing") self.doc_id_pattern = self.getConfigurationValue("doc_id") self.es_nodes = self.getConfigurationValue("nodes") self.read_timeout = self.getConfigurationValue("read_timeout") if not isinstance(self.es_nodes, list): self.es_nodes = [self.es_nodes] if self.getConfigurationValue("connection_type") == 'urllib3': self.connection_class = elasticsearch.connection.Urllib3HttpConnection elif self.getConfigurationValue("connection_type") == 'requests': self.connection_class = elasticsearch.connection.RequestsHttpConnection def getStartMessage(self): return "Idx: %s. Max buffer size: %d" % ( self.index_name, self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init es client after fork as mentioned in https://elasticsearch-py.readthedocs.org/en/master/ self.es = self.connect() if not self.es: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def connect(self): es = False tries = 0 while tries < 5 and not es: try: # Connect to es node and round-robin between them. self.logger.debug("Connecting to %s." % self.es_nodes) es = elasticsearch.Elasticsearch( self.es_nodes, connection_class=self.connection_class, timeout=self.read_timeout, sniff_on_start=self.getConfigurationValue( 'sniff_on_start'), sniff_on_connection_fail=self.getConfigurationValue( 'sniff_on_connection_fail'), sniff_timeout=5, maxsize=20, use_ssl=self.getConfigurationValue('use_ssl'), http_auth=self.getConfigurationValue('http_auth')) except: etype, evalue, etb = sys.exc_info() self.logger.warning( "Connection to %s failed. Exception: %s, Error: %s." % (self.es_nodes, etype, evalue)) self.logger.warning( "Waiting %s seconds before retring to connect." % ((4 + tries))) time.sleep(4 + tries) tries += 1 continue if not es: self.logger.error("Connection to %s failed. Shutting down." % self.es_nodes) self.lumbermill.shutDown() else: self.logger.debug("Connection to %s successful." % self.es_nodes) return es def handleEvent(self, event): if self.format: publish_data = self.getConfigurationValue('format', event) else: publish_data = event self.buffer.append(publish_data) yield None def dataToElasticSearchJson(self, events): """ Format data for elasticsearch bulk update. """ json_data = [] for event in events: index_name = mapDynamicValueInString(self.index_name, event, use_strftime=True).lower() event_type = event['lumbermill'][ 'event_type'] if 'lumbermill' in event and 'event_type' in event[ 'lumbermill'] else 'Unknown' doc_id = mapDynamicValue(self.doc_id_pattern, event) routing = mapDynamicValue(self.routing_pattern, use_strftime=True) if not doc_id: self.logger.error( "Could not find doc_id %s for event %s." % (self.getConfigurationValue("doc_id"), event)) continue header = { self.action: { '_index': index_name, '_type': event_type, '_id': doc_id } } if self.routing_pattern: header['index']['_routing'] = routing if self.ttl: header['index']['_ttl'] = self.ttl if self.action == 'update': event = {'doc': event} try: json_data.append("\n".join( (json.dumps(header), json.dumps(event), "\n"))) except UnicodeDecodeError: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not json encode %s. Exception: %s, Error: %s." % (event, etype, evalue)) json_data = "".join(json_data) return json_data def storeData(self, events): json_data = self.dataToElasticSearchJson(events) try: #started = time.time() # Bulk update of 500 events took 0.139621019363. self.es.bulk(body=json_data, consistency=self.consistency) #print("Bulk update of %s events took %s." % (len(events), time.time() - started)) return True except elasticsearch.exceptions.ConnectionError: try: self.logger.warning( "Lost connection to %s. Trying to reconnect." % (self.es_nodes, index_name)) self.es = self.connect() except: time.sleep(.5) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Server communication error. Exception: %s, Error: %s." % (etype, evalue)) self.logger.debug("Payload: %s" % json_data) if "Broken pipe" in evalue or "Connection reset by peer" in evalue: self.es = self.connect() def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class ZabbixSink(BaseThreadedModule): """ Send events to zabbix. hostname: Hostname for which the metrics should be stored. fields: Event fields to send. field_prefix: Prefix to prepend to field names. For e.g. cpu_count field with default lumbermill_ prefix, the Zabbix key is lumbermill_cpu_count. timestamp_field: Field to provide timestamp. If not provided, current timestamp is used. agent_conf: Path to zabbix_agent configuration file. If set to True defaults to /etc/zabbix/zabbix_agentd.conf. server: Address of zabbix server. If port differs from default it can be set by appending it, e.g. 127.0.0.1:10052. store_interval_in_secs: sending data to es in x seconds intervals. batch_size: sending data to es if event count is above, even if store_interval_in_secs is not reached. backlog_size: maximum count of events waiting for transmission. Events above count will be dropped. Configuration template: - ZabbixSink: hostname: # <type: string; is: required> fields: # <type: list; is: required> field_prefix: # <default: "lumbermill_"; type: string; is: optional> timestamp_field: # <default: "timestamp"; type: string; is: optional> agent_conf: # <default: True; type: boolean||string; is: optional> server: # <default: False; type: boolean||string; is: required if agent_conf is False else optional> store_interval_in_secs: # <default: 10; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): BaseThreadedModule.configure(self, configuration) self.hostname = self.getConfigurationValue("hostname") self.fields = self.getConfigurationValue("fields") self.field_prefix = self.getConfigurationValue("field_prefix") self.timestamp_field = self.getConfigurationValue("timestamp_field") self.batch_size = self.getConfigurationValue('batch_size') self.backlog_size = self.getConfigurationValue('backlog_size') self.agent_conf = self.getConfigurationValue("agent_conf") if self.agent_conf: if self.agent_conf is True: self.agent_conf = "/etc/zabbix/zabbix_agentd.conf" if not os.path.isfile(self.agent_conf): self.logger.error("%s does not point to an existing file." % self.agent_conf) self.lumbermill.shutDown() self.zabbix_sender = ZabbixSender(use_config=self.agent_conf) else: self.logger.error("asdads") server = self.getConfigurationValue("server") port = 10051 if ":" in self.server: server, port = self.server.split(":") self.zabbix_sender = ZabbixSender(zabbix_server=server, port=port) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def getStartMessage(self): if self.agent_conf: return "Config: %s. Max buffer size: %d" % (self.agent_conf, self.getConfigurationValue('backlog_size')) else: return "Server: %s. Max buffer size: %d" % (self.getConfigurationValue("server"), self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def handleEvent(self, event): self.buffer.append(event) yield None def storeData(self, events): packet = [] for event in events: if self.timestamp_field: try: timestamp = event[self.timestamp_field] except KeyError: timestamp = None hostname = mapDynamicValue(self.hostname, mapping_dict=event, use_strftime=True) for field_name in self.fields: try: packet.append(ZabbixMetric(hostname, "%s%s" % (self.field_prefix, field_name), event[field_name], timestamp)) except KeyError: pass #self.logger.warning("Could not send metrics for %s:%s. Field not found." % (hostname, field_name)) response = self.zabbix_sender.send(packet) if response.failed != 0: self.logger.warning("%d of %d metrics were not processed correctly." % (response.total-response.processed, response.total)) def shutDown(self): self.buffer.flush()
class ZmqSink(BaseThreadedModule): """ Sends events to zeromq. server: Server to connect to. Pattern: hostname:port. pattern: Either push or pub. mode: Whether to run a server or client. If running as server, pool size is restricted to a single process. topic: The channels topic. hwm: Highwatermark for sending socket. format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send msgpacked. store_interval_in_secs: Send data to redis in x seconds intervals. batch_size: Send data to redis if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped. Configuration template: - ZmqSink: server: # <default: 'localhost:5570'; type: string; is: optional> pattern: # <default: 'push'; type: string; values: ['push', 'pub']; is: optional> mode: # <default: 'connect'; type: string; values: ['connect', 'bind']; is: optional> topic: # <default: None; type: None||string; is: optional> hwm: # <default: None; type: None||integer; is: optional> format: # <default: None; type: None||string; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> """ module_type = "input" """Set module type""" can_run_forked = True def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.server = None self.topic = self.getConfigurationValue('topic') self.format = self.getConfigurationValue('format') self.mode = self.getConfigurationValue('mode') if self.mode == "bind": self.can_run_forked = False def initZmqContext(self): self.zmq_context = zmq.Context() if self.getConfigurationValue('pattern') == 'push': self.client = self.zmq_context.socket(zmq.PUSH) else: self.client = self.zmq_context.socket(zmq.PUB) if self.getConfigurationValue('hwm'): try: self.client.setsockopt(zmq.SNDHWM, self.getConfigurationValue('hwm')) except: self.client.setsockopt(zmq.HWM, self.getConfigurationValue('hwm')) server_name, server_port = self.getConfigurationValue('server').split(":") try: server_addr = socket.gethostbyname(server_name) except socket.gaierror: server_addr = server_name try: if self.getConfigurationValue('mode') == 'connect': self.client.connect('tcp://%s:%s' % (server_addr, server_port)) else: self.client.bind('tcp://%s:%s' % (server_addr, server_port)) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to zeromq at %s. Exception: %s, Error: %s." % (self.getConfigurationValue('server'), etype, evalue)) self.lumbermill.shutDown() def getStartMessage(self): return "%s. Max buffer size: %d" % (self.server, self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.initZmqContext() self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def storeData(self, buffered_data): try: for data in buffered_data: #print "Sending %s.\n" % data self.client.send("%s" % data) return True except zmq.error.ContextTerminated: pass except: exc_type, exc_value, exc_tb = sys.exc_info() if exc_value in ['Interrupted system call', 'Socket operation on non-socket']: return False self.logger.error("Could not add events to zmq. Exception: %s, Error: %s." % (exc_type, exc_value)) return False def handleEvent(self, event): if self.format: publish_data = mapDynamicValue(self.format, event) else: publish_data = msgpack.packb(event) if self.topic: publish_data = "%s %s" % (self.topic, publish_data) self.buffer.append(publish_data) yield None def shutDown(self): try: self.buffer.flush() except: pass try: self.client.close() self.zmq_context.term() except AttributeError: pass # Call parent shutDown method. BaseThreadedModule.shutDown(self)
class MongoDbSink(BaseThreadedModule): """ Store incoming events in a mongodb. host: Mongodb server. database: Mongodb database name. collection: Mongodb collection name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here. optinonal_connection_params: Other optional parameters as documented in https://api.mongodb.org/python/current/api/pymongo/mongo_client.html format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send. doc_id: Sets the document id for the committed event data. store_interval_in_secs: Send data to es in x seconds intervals. batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed. Configuration template: - MongoDbSink: host: # <default: 'localhost:27017'; type: string; is: optional> database: # <default: 'lumbermill'; type: string; is: optional> collection: # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional> optinonal_connection_params: # <default: {'serverSelectionTimeoutMS': 5}; type: dictionary; is: optional> format: # <default: None; type: None||string; is: optional> doc_id: # <default: '$(lumbermill.event_id)'; type: string; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 5000; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method. BaseThreadedModule.configure(self, configuration) self.format = self.getConfigurationValue('format') self.collection = self.getConfigurationValue('collection') self.database = self.getConfigurationValue('database') self.doc_id_pattern = self.getConfigurationValue("doc_id") def getStartMessage(self): return "DB: %s. Max buffer size: %d" % (self.getConfigurationValue( 'database'), self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init monogdb client after fork. self.mongodb = self.connect() if not self.mongodb: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def connect(self): try: mongodb_client = pymongo.MongoClient( self.getConfigurationValue('host'), **self.getConfigurationValue('optinonal_connection_params')) self.logger.debug(str(mongodb_client.server_info())) except: etype, evalue, etb = sys.exc_info() self.logger.warning( "Connection to %s failed. Exception: %s, Error: %s." % (self.getConfigurationValue('host'), etype, evalue)) if not mongodb_client: self.logger.error("Connection to %s failed. Shutting down." % self.getConfigurationValue('host')) self.lumbermill.shutDown() else: self.logger.debug("Connection to %s successful." % self.getConfigurationValue('host')) return mongodb_client def handleEvent(self, event): if self.format: publish_data = self.getConfigurationValue('format', event) else: publish_data = event self.buffer.append(publish_data) yield None def storeData(self, events): mongo_db = self.mongodb[self.database] bulk_objects = {} for event in events: collection_name = mapDynamicValueInString( self.collection, event, use_strftime=True).lower() doc_id = mapDynamicValue(self.doc_id_pattern, event) if not doc_id: self.logger.error("Could not find doc_id %s for event %s." % (self.doc_id_pattern, event)) continue event['_id'] = doc_id if collection_name not in bulk_objects.keys(): bulk_objects[collection_name] = mongo_db[ collection_name].initialize_ordered_bulk_op() try: bulk_objects[collection_name].insert(event) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Server communication error. Exception: %s, Error: %s." % (etype, evalue)) self.logger.debug("Payload: %s" % event) if "Broken pipe" in evalue or "Connection reset by peer" in evalue: self.mongodb = self.connect() for collection_name, bulk_object in bulk_objects.iteritems(): try: result = bulk_object.execute() self.logger.debug(str(result)) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Server communication error. Exception: %s, Error: %s." % (etype, evalue)) def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class FileSink(BaseThreadedModule): """ Store all received events in a file. file_name: absolute path to filen. String my contain pythons strtime directives and event fields, e.g. %Y-%m-%d. format: Which event fields to use in the logline, e.g. '$(@timestamp) - $(url) - $(country_code)' store_interval_in_secs: sending data to es in x seconds intervals. batch_size: sending data to es if event count is above, even if store_interval_in_secs is not reached. backlog_size: maximum count of events waiting for transmission. Events above count will be dropped. compress: Compress output as gzip or snappy file. For this to be effective, the chunk size should not be too small. Configuration template: - FileSink: file_name: # <type: string; is: required> format: # <default: '$(data)'; type: string; is: optional> store_interval_in_secs: # <default: 10; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> compress: # <default: None; type: None||string; values: [None,'gzip','snappy']; is: optional> """ module_type = "output" """Set module type""" can_run_forked = False def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.batch_size = self.getConfigurationValue('batch_size') self.backlog_size = self.getConfigurationValue('backlog_size') self.file_name = self.getConfigurationValue('file_name') self.format = self.getConfigurationValue('format') self.compress = self.getConfigurationValue('compress') self.file_handles = {} if self.compress == 'gzip': try: # Import module into namespace of object. Otherwise it will not be accessible when process was forked. self.gzip_module = __import__('gzip') except ImportError: self.logger.error( 'Gzip compression selected but gzip module could not be loaded.' ) self.lumbermill.shutDown() if self.compress == 'snappy': try: self.snappy_module = __import__('snappy') except ImportError: self.logger.error( 'Snappy compression selected but snappy module could not be loaded.' ) self.lumbermill.shutDown() self.buffer = Buffer( self.batch_size, self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.backlog_size) TimedFunctionManager.startTimedFunction(self.closeStaleFileHandles) def getStartMessage(self): return "File: %s. Max buffer size: %d" % ( self.file_name, self.getConfigurationValue('backlog_size')) @setInterval(60) def closeStaleFileHandles(self): """ Close and delete file handles that are unused since 5 minutes. """ for path, file_handle_data in self.file_handles.items(): last_used_time_ago = time.time() - file_handle_data['lru'] if last_used_time_ago < 300: continue self.logger.info('Closing stale file handle for %s.' % (path)) file_handle_data['handle'].close() self.file_handles.pop(path) def closeAllFileHandles(self): for path, file_handle_data in self.file_handles.items(): self.logger.info('Closing file handle for %s.' % path) file_handle_data['handle'].close() self.file_handles.pop(path) def ensurePathExists(self, path): dirpath = os.path.dirname(path) if not os.path.exists(dirpath): os.makedirs(dirpath) def handleEvent(self, event): self.buffer.append(event) yield None def getOrCreateFileHandle(self, path, mode): file_handle = None try: file_handle = self.file_handles[path]['handle'] self.file_handles[path]['lru'] = time.time() except KeyError: try: file_handle = open(path, mode) self.file_handles[path] = { 'handle': file_handle, 'lru': time.time() } except: etype, evalue, etb = sys.exc_info() self.logger.error( 'Could no open %s for writing. Exception: %s, Error: %s.' % (path, etype, evalue)) return file_handle def storeData(self, events): write_data = collections.defaultdict(str) for event in events: path = mapDynamicValue(self.file_name, mapping_dict=event, use_strftime=True) line = mapDynamicValue(self.format, mapping_dict=event) write_data["%s" % path] += line + "\n" for path, lines in write_data.items(): try: self.ensurePathExists(path) except: etype, evalue, etb = sys.exc_info() self.logger.error( 'Could no create path %s. Events could not be written. Exception: %s, Error: %s.' % (path, etype, evalue)) return mode = "a+" if self.compress == 'gzip': path += ".gz" mode += "b" lines = self.compressGzip(lines) elif self.compress == 'snappy': path += ".snappy" lines = self.compressSnappy(lines) mode += "b" try: fh = self.getOrCreateFileHandle(path, mode) fh.write(lines) fh.flush() return True except: etype, evalue, etb = sys.exc_info() self.logger.error( 'Could no write event data to %s. Exception: %s, Error: %s.' % (path, etype, evalue)) def shutDown(self): self.buffer.flush() self.closeAllFileHandles() BaseThreadedModule.shutDown(self) def compressGzip(self, data): buffer = StringIO() compressor = self.gzip_module.GzipFile(mode='wb', fileobj=buffer) try: compressor.write(data) finally: compressor.close() return buffer.getvalue() def compressSnappy(self, data): return self.snappy_module.compress(data)
class ElasticSearchSink(BaseThreadedModule): """ Store the data dictionary in an elasticsearch index. The elasticsearch module takes care of discovering all nodes of the elasticsearch cluster. Requests will the be loadbalanced via round robin. action: Either index or update. If update be sure to provide the correct doc_id. fields: Which event fields to send on, e.g. [timestamp, url, country_code]. If not set the whole event dict is send. nodes: Configures the elasticsearch nodes. read_timeout: Set number of seconds to wait until requests to elasticsearch will time out. connection_type: One of: 'thrift', 'http'. http_auth: 'user:password'. use_ssl: One of: True, False. index_name: Sets the index name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here. doc_id: Sets the es document id for the committed event data. routing: Sets a routing value (@see: http://www.elasticsearch.org/blog/customizing-your-document-routing/) Timepatterns like %Y.%m.%d are allowed here. ttl: When set, documents will be automatically deleted after ttl expired. Can either set time in milliseconds or elasticsearch date format, e.g.: 1d, 15m etc. This feature needs to be enabled for the index. @See: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-ttl-field.html sniff_on_start: The client can be configured to inspect the cluster state to get a list of nodes upon startup. Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this. sniff_on_connection_fail: The client can be configured to inspect the cluster state to get a list of nodes upon failure. Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this. store_interval_in_secs: Send data to es in x seconds intervals. batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed. Configuration template: - ElasticSearchSink: action: # <default: 'index'; type: string; is: optional; values: ['index', 'update']> fields: # <default: None; type: None||list; is: optional> nodes: # <type: string||list; is: required> read_timeout: # <default: 10; type: integer; is: optional> connection_type: # <default: 'urllib3'; type: string; values: ['urllib3', 'requests']; is: optional> http_auth: # <default: None; type: None||string; is: optional> use_ssl: # <default: False; type: boolean; is: optional> index_name: # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional> doc_id: # <default: '$(lumbermill.event_id)'; type: string; is: optional> doc_type: # <default: '$(lumbermill.event_type)'; type: string; is: optional> routing: # <default: None; type: None||string; is: optional> ttl: # <default: None; type: None||integer||string; is: optional> sniff_on_start: # <default: False; type: boolean; is: optional> sniff_on_connection_fail: # <default: False; type: boolean; is: optional> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 500; type: integer; is: optional> backlog_size: # <default: 500; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method. BaseThreadedModule.configure(self, configuration) for module_name in ['elasticsearch', 'urllib3', 'requests']: if self.getConfigurationValue('log_level') == 'info': logging.getLogger(module_name).setLevel(logging.WARN) else: # Set log level for elasticsarch library if configured to other than default. logging.getLogger(module_name).setLevel(self.logger.level) self.action = self.getConfigurationValue('action') self.fields = self.getConfigurationValue('fields') self.ttl = self.getConfigurationValue("ttl") self.index_name = self.getConfigurationValue("index_name") self.routing_pattern = self.getConfigurationValue("routing") self.doc_id_pattern = self.getConfigurationValue("doc_id") self.doc_type_pattern = self.getConfigurationValue("doc_type") self.doc_type_is_dynamic = self.isDynamicConfigurationValue("doc_type") self.es_nodes = self.getConfigurationValue("nodes") self.read_timeout = self.getConfigurationValue("read_timeout") if not isinstance(self.es_nodes, list): self.es_nodes = [self.es_nodes] if self.getConfigurationValue("connection_type") == 'urllib3': self.connection_class = elasticsearch.connection.Urllib3HttpConnection elif self.getConfigurationValue("connection_type") == 'requests': self.connection_class = elasticsearch.connection.RequestsHttpConnection def getStartMessage(self): return "Idx: %s. Max buffer size: %d" % (self.index_name, self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) # Init es client after fork as mentioned in https://elasticsearch-py.readthedocs.org/en/master/ self.es = self.connect() if not self.es: self.lumbermill.shutDown() return # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here. self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) def connect(self): es = False tries = 0 while tries < 5 and not es: try: # Connect to es node and round-robin between them. self.logger.debug("Connecting to %s." % self.es_nodes) es = elasticsearch.Elasticsearch(self.es_nodes, connection_class=self.connection_class, timeout=self.read_timeout, sniff_on_start=self.getConfigurationValue('sniff_on_start'), sniff_on_connection_fail=self.getConfigurationValue('sniff_on_connection_fail'), sniff_timeout=5, maxsize=20, use_ssl=self.getConfigurationValue('use_ssl'), http_auth=self.getConfigurationValue('http_auth')) except: etype, evalue, etb = sys.exc_info() self.logger.warning("Connection to %s failed. Exception: %s, Error: %s." % (self.es_nodes, etype, evalue)) self.logger.warning("Waiting %s seconds before retring to connect." % ((4 + tries))) time.sleep(4 + tries) tries += 1 continue if not es: self.logger.error("Connection to %s failed. Shutting down." % self.es_nodes) self.lumbermill.shutDown() else: self.logger.debug("Connection to %s successful." % self.es_nodes) return es def handleEvent(self, event): if self.fields: publish_data = {} for field in self.fields: try: publish_data.update(event[field]) except KeyError: continue else: publish_data = event self.buffer.append(publish_data) yield None def dataToElasticSearchJson(self, events): """ Format data for elasticsearch bulk update. """ json_data = [] for event in events: index_name = mapDynamicValueInString(self.index_name, event, use_strftime=True).lower() doc_type = mapDynamicValueInString(self.doc_type_pattern, event) doc_id = mapDynamicValueInString(self.doc_id_pattern, event) routing = mapDynamicValue(self.routing_pattern, use_strftime=True) if not doc_id: self.logger.error("Could not find doc_id %s for event %s." % (self.getConfigurationValue("doc_id"), event)) continue header = {self.action: {'_index': index_name, '_type': doc_type, '_id': doc_id}} if self.routing_pattern: header['index']['_routing'] = routing if self.ttl: header['index']['_ttl'] = self.ttl if self.action == 'update': event = {'doc': event} try: json_data.append("\n".join((json.dumps(header), json.dumps(event), "\n"))) except UnicodeDecodeError: etype, evalue, etb = sys.exc_info() self.logger.error("Could not json encode %s. Exception: %s, Error: %s." % (event, etype, evalue)) json_data = "".join(json_data) return json_data def storeData(self, events): json_data = self.dataToElasticSearchJson(events) try: #started = time.time() # Bulk update of 500 events took 0.139621019363. self.es.bulk(body=json_data) #print("Bulk update of %s events took %s." % (len(events), time.time() - started)) return True except elasticsearch.exceptions.ConnectionError: try: self.logger.warning("Lost connection to %s. Trying to reconnect." % (self.es_nodes, self.index_name)) self.es = self.connect() except: time.sleep(.5) except: etype, evalue, etb = sys.exc_info() self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue)) self.logger.debug("Payload: %s" % json_data) if "Broken pipe" in evalue or "Connection reset by peer" in evalue: self.es = self.connect() def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class Cache(BaseThreadedModule): """ A simple wrapper around the python simplekv module. It can be used to store results of modules in all simplekv supported backends. When set, the following options cause RedisStore to use a buffer for setting values. Multiple values are set via the pipe command, which speeds up storage. Still this comes at a price. Buffered values, that have not yet been send to redis, will be lost when LumberMill crashes. backend: backends supported by [simplekv](http://pythonhosted.org//simplekv/) store_interval_in_secs: Sending data to redis in x seconds intervals. batch_size: Sending data to redis if count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of values waiting for transmission. Values above count will be dropped. Configuration template: - Cache: backend: # <default: 'DictStore'; type: string; values:['DictStore', 'RedisStore', 'MemcacheStore']; is: optional> server: # <default: None; type: None||string; is: required if backend in ['RedisStore', 'MemcacheStore'] and cluster is None else optional> cluster: # <default: None; type: None||dictionary; is: required if backend == 'RedisStore' and server is None else optional> port: # <default: 6379; type: integer; is: optional> db: # <default: 0; type: integer; is: optional> password: # <default: None; type: None||string; is: optional> socket_timeout: # <default: 10; type: integer; is: optional> charset: # <default: 'utf-8'; type: string; is: optional> errors: # <default: 'strict'; type: string; is: optional> decode_responses: # <default: False; type: boolean; is: optional> unix_socket_path: # <default: None; type: None||string; is: optional> batch_size: # <default: None; type: None||integer; is: optional> store_interval_in_secs: # <default: None; type: None||integer; is: optional> backlog_size: # <default: 5000; type: integer; is: optional> """ module_type = "stand_alone" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.backend = self.getConfigurationValue('backend') self.backend_client = None self.kv_store = None self.set_buffer = None if self.backend == 'DictStore': import simplekv.memory self.kv_store = simplekv.memory.DictStore() elif self.backend == 'RedisStore': import simplekv.memory.redisstore self.backend_client = self._getRedisClient() self.kv_store = simplekv.memory.redisstore.RedisStore(self.backend_client) elif self.backend == 'MemcacheStore': import simplekv.memory.memcachestore self.backend_client = self._getMemcacheClient() self.kv_store = simplekv.memory.memcachestore.MemcacheStore(self.backend_client) else: self.logger("Unknown backend type %s. Please check." % backend) self.lumbermill.shutDown(); if self.getConfigurationValue('store_interval_in_secs') or self.getConfigurationValue('batch_size'): if self.backend == 'RedisStore': self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self._setRedisBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) else: self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self._setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self._setBuffered self._get = self.get self.get = self._getBuffered self._delete = self.delete self.delete = self._deleteBuffered self._pop = self.pop self.pop = self._popBuffered def _getRedisClient(self): if not self.getConfigurationValue('cluster') or len(self.getConfigurationValue('cluster')) == 0: redis_store = self.getConfigurationValue('server') client = self._getSimpleRedisClient() else: redis_store = self.getConfigurationValue('cluster') client = self._getClusterRedisClient() try: client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (redis_store, etype, evalue)) self.lumbermill.shutDown() return client def _getMemcacheClient(self): client = None # TODO: implement memcache client return client def _getSimpleRedisClient(self): try: client = redis.StrictRedis(host=self.getConfigurationValue('server'), port=self.getConfigurationValue('port'), db=self.getConfigurationValue('db'), password=self.getConfigurationValue('password'), socket_timeout=self.getConfigurationValue('socket_timeout'), charset=self.getConfigurationValue('charset'), errors=self.getConfigurationValue('errors'), decode_responses=self.getConfigurationValue('decode_responses'), unix_socket_path=self.getConfigurationValue('unix_socket_path')) return client except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue['server'], etype, evalue)) self.lumbermill.shutDown() def _getClusterRedisClient(self): try: import rediscluster except ImportError: self.logger.error("Could not import rediscluster module. To install follow instructions @https://github.com/salimane/rediscluster-py") self.lumbermill.shutDown() # TODO: Implement a locking mechanism for the cluster client. # Some modules like Facet depend on this. cluster = {'nodes': {}, 'master_of': {}} counter = 1 for master_node, slave_nodes in self.getConfigurationValue('cluster').items(): master_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress(master_node) cluster['nodes'].update({master_node_key: {'host': node_name_or_ip, 'port': node_port}}) if 'default_node' not in cluster: cluster['default_node'] = master_node if type(slave_nodes) is str: slave_nodes = [slave_nodes] for slave_node in slave_nodes: counter += 1 slave_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress(slave_node) cluster['nodes'].update({slave_node_key: {'host':node_name_or_ip, 'port': node_port}}) cluster['master_of'].update({master_node_key: slave_node_key}) try: client = rediscluster.StrictRedisCluster(cluster=cluster, db=self.getConfigurationValue('db')) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue['cluster'], etype, evalue)) self.lumbermill.shutDown() return client def _parseRedisServerAddress(self, node_address): try: node_name_or_ip, node_port = node_address.split(":") except ValueError: node_name_or_ip = node_address node_port = self.getConfigurationValue('port') return (node_name_or_ip, node_port) def getBackendName(self): return self.backend def iterKeys(self): for key in self.kv_store.iter_keys(): yield key def getClient(self): return self.backend_client def getLock(self, name, timeout=None, sleep=0.1): lock = False try: lock = self.backend_client.lock(name, timeout, sleep) except AttributeError: pass return lock def set(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise # Only backend clients support ttl. if self.backend_client and ttl: self.kv_store.put(key, value, ttl_secs=ttl) else: self.kv_store.put(key, value) def _setBuffered(self, key, value, ttl=0, pickle=True): self.set_buffer.append({'key': key, 'value': value, 'ttl': ttl, 'pickle': pickle}) def _setBufferedCallback(self, values): for value in values: self._set(value['key'], value['value'], value['ttl'], value['pickle']) def _setRedisBufferedCallback(self, values): pipe = self.backend_client.pipeline() for value in values: if value['pickle'] is True: try: value['value'] = cPickle.dumps(value['value']) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not store %s:%s in redis. Exception: %s, Error: %s." % (value['key'], value['value'], etype, evalue)) raise if(value['ttl'] == 0): pipe.set(value['key'], value['value']) else: pipe.setex(value['key'], value['ttl'], value['value']) try: pipe.execute() return True except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not flush buffer. Exception: %s, Error: %s." % (etype, evalue)) def get(self, key, unpickle=True): value = self.kv_store.get(key) if unpickle and value: try: value = cPickle.loads(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not unpickle %s:%s from redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise return value def _getBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer[value_idx]['value'] except: return self._get(key, unpickle) def delete(self, key): self.kv_store.delete(key) def _deleteBuffered(self, key): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) self.set_buffer.buffer.pop(value_idx) return except: self._delete(key) def pop(self, key, unpickle=True): value = self.get(key, unpickle) if value: self.delete(key) return value def _popBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer.pop(value_idx)['value'] except: return self._pop(key, unpickle) def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class KeyValueStore(BaseThreadedModule): """ A simple wrapper around the python simplekv module. It can be used to store results of modules in all simplekv supported backends. When set, the following options cause RedisStore to use a buffer for setting values. Multiple values are set via the pipe command, which speeds up storage. Still this comes at a price. Buffered values, that have not yet been send to redis, will be lost when LumberMill crashes. backend: backends supported by [simplekv](http://pythonhosted.org//simplekv/) store_interval_in_secs: Sending data to redis in x seconds intervals. batch_size: Sending data to redis if count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of values waiting for transmission. Values above count will be dropped. Configuration template: - KeyValueStore: backend: # <default: 'DictStore'; type: string; values:['DictStore', 'RedisStore', 'MemcacheStore']; is: optional> server: # <default: None; type: None||string; is: required if backend in ['RedisStore', 'MemcacheStore'] and cluster is None else optional> cluster: # <default: None; type: None||dictionary; is: required if backend == 'RedisStore' and server is None else optional> port: # <default: 6379; type: integer; is: optional> db: # <default: 0; type: integer; is: optional> password: # <default: None; type: None||string; is: optional> socket_timeout: # <default: 10; type: integer; is: optional> charset: # <default: 'utf-8'; type: string; is: optional> errors: # <default: 'strict'; type: string; is: optional> decode_responses: # <default: False; type: boolean; is: optional> unix_socket_path: # <default: None; type: None||string; is: optional> batch_size: # <default: None; type: None||integer; is: optional> store_interval_in_secs: # <default: None; type: None||integer; is: optional> backlog_size: # <default: 5000; type: integer; is: optional> """ module_type = "stand_alone" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) backend = self.getConfigurationValue('backend') self.backend_client = None if backend == 'DictStore': import simplekv.memory self.backend_client = None self.kv_store = simplekv.memory.DictStore() elif backend == 'RedisStore': import simplekv.memory.redisstore self.backend_client = self.getRedisClient() self.kv_store = simplekv.memory.redisstore.RedisStore(self.backend_client) elif backend == 'MemcacheStore': import simplekv.memory.memcachestore self.backend_client = self.getMemcacheClient() self.kv_store = simplekv.memory.memcachestore.MemcacheStore(self.backend_client) self.set_buffer = None if self.getConfigurationValue('store_interval_in_secs') or self.getConfigurationValue('batch_size'): if backend == 'RedisStore': self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self.setRedisBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) else: self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self.setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self.setBuffered self._get = self.get self.get = self.getBuffered self._delete = self.delete self.delete = self.deleteBuffered self._pop = self.pop self.pop = self.popBuffered def getRedisClient(self): if not self.getConfigurationValue('cluster') or len(self.getConfigurationValue('cluster')) == 0: redis_store = self.getConfigurationValue('server') client = self.getSimpleRedisClient() else: redis_store = self.getConfigurationValue('cluster') client = self.getClusterRedisClient() try: client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (redis_store, etype, evalue)) self.lumbermill.shutDown() return client def getMemcacheClient(self): client = None # TODO: implement memcache client return client def getSimpleRedisClient(self): try: client = redis.StrictRedis(host=self.getConfigurationValue('server'), port=self.getConfigurationValue('port'), db=self.getConfigurationValue('db'), password=self.getConfigurationValue('password'), socket_timeout=self.getConfigurationValue('socket_timeout'), charset=self.getConfigurationValue('charset'), errors=self.getConfigurationValue('errors'), decode_responses=self.getConfigurationValue('decode_responses'), unix_socket_path=self.getConfigurationValue('unix_socket_path')) return client except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue['server'], etype, evalue)) self.lumbermill.shutDown() def getClusterRedisClient(self): try: import rediscluster except ImportError: self.logger.error("Could not import rediscluster module. To install follow instructions @https://github.com/salimane/rediscluster-py") self.lumbermill.shutDown() # TODO: Implement a locking mechanism for the cluster client. # Some modules like Facet depend on this. cluster = {'nodes': {}, 'master_of': {}} counter = 1 for master_node, slave_nodes in self.getConfigurationValue('cluster').items(): master_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress(master_node) cluster['nodes'].update({master_node_key: {'host': node_name_or_ip, 'port': node_port}}) if 'default_node' not in cluster: cluster['default_node'] = master_node if type(slave_nodes) is str: slave_nodes = [slave_nodes] for slave_node in slave_nodes: counter += 1 slave_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress(slave_node) cluster['nodes'].update({slave_node_key: {'host':node_name_or_ip, 'port': node_port}}) cluster['master_of'].update({master_node_key: slave_node_key}) try: client = rediscluster.StrictRedisCluster(cluster=cluster, db=self.getConfigurationValue('db')) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue['cluster'], etype, evalue)) self.lumbermill.shutDown() return client def _parseRedisServerAddress(self, node_address): try: node_name_or_ip, node_port = node_address.split(":") except ValueError: node_name_or_ip = node_address node_port = self.getConfigurationValue('port') return (node_name_or_ip, node_port) def iterKeys(self): for key in self.kv_store.iter_keys(): yield key def getClient(self): return self.backend_client def getLock(self, name, timeout=None, sleep=0.1): lock = False try: lock = self.backend_client.lock(name, timeout, sleep) except: pass return lock def set(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise if ttl: self.kv_store.put(key, value, ttl_secs=ttl) else: self.kv_store.put(key, value) def setBuffered(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise if ttl: self.set_buffer.append({'key':key, 'ttl': ttl, 'value': value}) else: self.set_buffer.append({'key':key, 'value': value}) def setBufferedCallback(self, values): for value in values: if 'ttl' in value: self._set(value['key'], value['value'], value['ttl']) else: self._set(value['key'], value['value']) def setRedisBufferedCallback(self, values): pipe = self.backend_client.pipeline() for value in values: if 'ttl' in value: pipe.setex(value['key'], value['ttl'], value['value']) else: pipe.set(value['key'], value['value']) try: pipe.execute() return True except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not flush buffer. Exception: %s, Error: %s." % (etype, evalue)) def get(self, key, unpickle=True): value = self.kv_store.get(key) if unpickle and value: try: value = cPickle.loads(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not unpickle %s:%s from redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise return value def getBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer[value_idx] except: return self._get(key, unpickle) def delete(self, key): self.kv_store.delete(key) def deleteBuffered(self, key): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) self.set_buffer.buffer.pop(value_idx) return except: self._delete(key) def pop(self, key, unpickle=True): value = self.get(key, unpickle) if value: self.delete(key) return value def popBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer.pop(value_idx) except: return self._pop(key, unpickle) def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class GraphiteSink(BaseThreadedModule): """ Send metrics to graphite server. server: Graphite server to connect to. port: Port carbon-cache is listening on. formats: Format of messages to send to graphite, e.g.: ['lumbermill.stats.event_rate_$(interval)s $(event_rate)']. store_interval_in_secs: Send data to graphite in x seconds intervals. batch_size: Send data to graphite if event count is above, even if store_interval_in_secs is not reached. backlog_size: Send count of events waiting for transmission. Events above count will be dropped. Here a simple example to send http_status statistics to graphite: ... - Statistics: interval: 10 fields: ['http_status'] - GraphiteSink: filter: if $(field_name) == "http_status" server: 127.0.0.1 batch_size: 1 formats: ['lumbermill.stats.http_200_$(interval)s $(field_counts.200)', 'lumbermill.stats.http_400_$(interval)s $(field_counts.400)', 'lumbermill.stats.http_total_$(interval)s $(total_count)'] ... Configuration template: - GraphiteSink: server: # <default: 'localhost'; type: string; is: optional> port: # <default: 2003; type: integer; is: optional> formats: # <type: list; is: required> store_interval_in_secs: # <default: 5; type: integer; is: optional> batch_size: # <default: 50; type: integer; is: optional> backlog_size: # <default: 50; type: integer; is: optional> """ module_type = "output" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) self.formats = self.getConfigurationValue('formats') self.connection_data = (self.getConfigurationValue('server'), self.getConfigurationValue('port')) self.connection = None def connect(self): # Connect to server connection = socket.socket() try: connection.connect(self.connection_data) return connection except: etype, evalue, etb = sys.exc_info() self.logger.error( "Failed to connect to %s. Exception: %s, Error: %s." % (self.connection_data, etype, evalue)) return False def getStartMessage(self): return "%s:%s. Max buffer size: %d" % ( self.connection_data[0], self.connection_data[1], self.getConfigurationValue('backlog_size')) def initAfterFork(self): BaseThreadedModule.initAfterFork(self) self.buffer = Buffer( self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self.connection = self.connect() if not self.connection: self.lumbermill.shutDown() return BaseThreadedModule.initAfterFork(self) def handleEvent(self, event): for format in self.formats: mapped_data = self.mapDynamicValue(format, event) if mapped_data: self.buffer.append("%s %s" % (mapped_data, int(time.time()))) yield None def storeData(self, events): for event in events: try: if not event.endswith("\n"): event += "\n" self.connection.send(event) return True except: etype, evalue, etb = sys.exc_info() self.logger.error( "Server communication error. Exception: %s, Error: %s." % (etype, evalue)) tries = 0 self.connection.close() self.connection = None while tries < 5 and not self.connection: time.sleep(5) self.logger.warning("Trying to reconnect to %s." % (self.connection_data)) # Try to reconnect. self.connection = self.connect() tries += 1 if not self.connection: self.logger.error("Reconnect failed. Shutting down.") self.lumbermill.shutDown() else: self.logger.info("Reconnection to %s successful." % (self.connection_data)) def shutDown(self): try: self.connection.close() except: pass
class RedisStore(BaseThreadedModule): """ A simple wrapper around the redis python module. It can be used to store results of modules in a redis key/value store. server: Redis server to connect to. cluster: Dictionary of redis masters as keys and pack_followers as values, e.g.: {'172.16.0.1:6379': '172.16.0.2:6379'} port: Port redis server is listening on. db: Redis db. password: Redis password. socket_timeout: Socket timeout in seconds. charset: Charset to use. errors: tbd. decode_responses: specifies whether return values from Redis commands get decoded automatically using the client's charset value. unix_socket_path: Path to unix socket file. When set, the following options cause RedisStore to use a buffer for setting values. Multiple values are set via the pipe command, which speeds up storage. Still this comes at a price. Buffered values, that have not yet been send to redis, will be lost when LumberMill crashes. store_interval_in_secs: Sending data to redis in x seconds intervals. batch_size: Sending data to redis if count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of values waiting for transmission. Values above count will be dropped. Configuration template: - RedisStore: server: # <default: 'localhost'; type: string; is: optional> cluster: # <default: {}; type: dictionary; is: optional> port: # <default: 6379; type: integer; is: optional> db: # <default: 0; type: integer; is: optional> password: # <default: None; type: None||string; is: optional> socket_timeout: # <default: 10; type: integer; is: optional> charset: # <default: 'utf-8'; type: string; is: optional> errors: # <default: 'strict'; type: string; is: optional> decode_responses: # <default: False; type: boolean; is: optional> unix_socket_path: # <default: None; type: None||string; is: optional> batch_size: # <default: None; type: None||integer; is: optional> store_interval_in_secs: # <default: None; type: None||integer; is: optional> backlog_size: # <default: 5000; type: integer; is: optional> """ module_type = "stand_alone" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) if len(self.getConfigurationValue('cluster')) == 0: redis_store = self.getConfigurationValue('server') self.client = self.getRedisClient() else: redis_store = self.getConfigurationValue('cluster') self.client = self.getClusterRedisClient() try: self.client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (redis_store,etype, evalue)) self.lumbermill.shutDown() self.set_buffer = None if self.getConfigurationValue('store_interval_in_secs') or self.getConfigurationValue('batch_size'): self.set_buffer = Buffer(self.getConfigurationValue('batch_size'), self.setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self.setBuffered self._get = self.get self.get = self.getBuffered self._delete = self.delete self.delete = self.deleteBuffered self._pop = self.pop self.pop = self.popBuffered def getRedisClient(self): try: client = redis.StrictRedis(host=self.getConfigurationValue('server'), port=self.getConfigurationValue('port'), db=self.getConfigurationValue('db'), password=self.getConfigurationValue('password'), socket_timeout=self.getConfigurationValue('socket_timeout'), charset=self.getConfigurationValue('charset'), errors=self.getConfigurationValue('errors'), decode_responses=self.getConfigurationValue('decode_responses'), unix_socket_path=self.getConfigurationValue('unix_socket_path')) return client except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue['server'], etype, evalue)) def getClusterRedisClient(self): import rediscluster # TODO: Implement a locking mechnism for the cluster client. # Some modules like Facet depend on this. cluster = {'nodes': {}, 'master_of': {}} counter = 1 for master_node, slave_nodes in self.getConfigurationValue('cluster').items(): master_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress(master_node) cluster['nodes'].update({master_node_key: {'host':node_name_or_ip, 'port': node_port}}) #if 'default_node' not in cluster: # cluster['default_node'] = master_node if type(slave_nodes) is str: slave_nodes = [slave_nodes] for slave_node in slave_nodes: counter += 1 slave_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress(slave_node) cluster['nodes'].update({slave_node_key: {'host':node_name_or_ip, 'port': node_port}}) #cluster['master_of'].update({master_node_key: slave_node_key}) client = rediscluster.StrictRedisCluster(cluster=cluster, db=self.getConfigurationValue('db')) return client def _parseRedisServerAddress(self, node_address): try: node_name_or_ip, node_port = node_address.split(":") except ValueError: node_name_or_ip = node_address node_port = self.getConfigurationValue('port') return (node_name_or_ip, node_port) def getClient(self): return self.client def getLock(self, name, timeout=None, sleep=0.1): return self.client.lock(name, timeout, sleep) def set(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise if ttl: self.client.setex(key, ttl, value) else: self.client.set(key, value) def setBuffered(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise if ttl: self.set_buffer.append({'key':key, 'ttl': ttl, 'value': value}) else: self.set_buffer.append({'key':key, 'value': value}) def setBufferedCallback(self, values): pipe = self.client.pipeline() for value in values: if 'ttl' in value: pipe.setex(value['key'], value['ttl'], value['value']) else: pipe.set(value['key'], value['value']) try: pipe.execute() return True except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not flush buffer. Exception: %s, Error: %s." % (etype, evalue)) def get(self, key, unpickle=True): value = self.client.get(key) if unpickle and value: try: value = cPickle.loads(value) except: etype, evalue, etb = sys.exc_info() self.logger.error("Could not unpickle %s:%s from redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise return value def getBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer[value_idx] except: return self._get(key, unpickle) def delete(self, key): self.client.delete(key) def deleteBuffered(self, key): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) self.set_buffer.buffer.pop(value_idx) return except: self._delete(key) def pop(self, key, unpickle=True): value = self.get(key, unpickle) if value: self.delete(key) return value def popBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer.pop(value_idx) except: return self._pop(key, unpickle) def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)
class RedisStore(BaseThreadedModule): """ A simple wrapper around the redis python module. It can be used to store results of modules in a redis key/value store. server: Redis server to connect to. cluster: Dictionary of redis masters as keys and pack_followers as values, e.g.: {'172.16.0.1:6379': '172.16.0.2:6379'} port: Port redis server is listening on. db: Redis db. password: Redis password. socket_timeout: Socket timeout in seconds. charset: Charset to use. errors: tbd. decode_responses: specifies whether return values from Redis commands get decoded automatically using the client's charset value. unix_socket_path: Path to unix socket file. When set, the following options cause RedisStore to use a buffer for setting values. Multiple values are set via the pipe command, which speeds up storage. Still this comes at a price. Buffered values, that have not yet been send to redis, will be lost when LumberMill crashes. store_interval_in_secs: Sending data to redis in x seconds intervals. batch_size: Sending data to redis if count is above, even if store_interval_in_secs is not reached. backlog_size: Maximum count of values waiting for transmission. Values above count will be dropped. Configuration template: - RedisStore: server: # <default: 'localhost'; type: string; is: optional> cluster: # <default: {}; type: dictionary; is: optional> port: # <default: 6379; type: integer; is: optional> db: # <default: 0; type: integer; is: optional> password: # <default: None; type: None||string; is: optional> socket_timeout: # <default: 10; type: integer; is: optional> charset: # <default: 'utf-8'; type: string; is: optional> errors: # <default: 'strict'; type: string; is: optional> decode_responses: # <default: False; type: boolean; is: optional> unix_socket_path: # <default: None; type: None||string; is: optional> batch_size: # <default: None; type: None||integer; is: optional> store_interval_in_secs: # <default: None; type: None||integer; is: optional> backlog_size: # <default: 5000; type: integer; is: optional> """ module_type = "stand_alone" """Set module type""" def configure(self, configuration): # Call parent configure method BaseThreadedModule.configure(self, configuration) if len(self.getConfigurationValue('cluster')) == 0: redis_store = self.getConfigurationValue('server') self.client = self.getRedisClient() else: redis_store = self.getConfigurationValue('cluster') self.client = self.getClusterRedisClient() try: self.client.ping() except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not connect to redis store at %s. Exception: %s, Error: %s." % (redis_store, etype, evalue)) self.lumbermill.shutDown() self.set_buffer = None if self.getConfigurationValue( 'store_interval_in_secs') or self.getConfigurationValue( 'batch_size'): self.set_buffer = Buffer( self.getConfigurationValue('batch_size'), self.setBufferedCallback, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size')) self._set = self.set self.set = self.setBuffered self._get = self.get self.get = self.getBuffered self._delete = self.delete self.delete = self.deleteBuffered self._pop = self.pop self.pop = self.popBuffered def getRedisClient(self): try: client = redis.StrictRedis( host=self.getConfigurationValue('server'), port=self.getConfigurationValue('port'), db=self.getConfigurationValue('db'), password=self.getConfigurationValue('password'), socket_timeout=self.getConfigurationValue('socket_timeout'), charset=self.getConfigurationValue('charset'), errors=self.getConfigurationValue('errors'), decode_responses=self.getConfigurationValue( 'decode_responses'), unix_socket_path=self.getConfigurationValue( 'unix_socket_path')) return client except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue['server'], etype, evalue)) def getClusterRedisClient(self): import rediscluster # TODO: Implement a locking mechnism for the cluster client. # Some modules like Facet depend on this. cluster = {'nodes': {}, 'master_of': {}} counter = 1 for master_node, slave_nodes in self.getConfigurationValue( 'cluster').items(): master_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress( master_node) cluster['nodes'].update({ master_node_key: { 'host': node_name_or_ip, 'port': node_port } }) #if 'default_node' not in cluster: # cluster['default_node'] = master_node if type(slave_nodes) is str: slave_nodes = [slave_nodes] for slave_node in slave_nodes: counter += 1 slave_node_key = "node_%d" % counter node_name_or_ip, node_port = self._parseRedisServerAddress( slave_node) cluster['nodes'].update({ slave_node_key: { 'host': node_name_or_ip, 'port': node_port } }) #cluster['master_of'].update({master_node_key: slave_node_key}) client = rediscluster.StrictRedisCluster( cluster=cluster, db=self.getConfigurationValue('db')) return client def _parseRedisServerAddress(self, node_address): try: node_name_or_ip, node_port = node_address.split(":") except ValueError: node_name_or_ip = node_address node_port = self.getConfigurationValue('port') return (node_name_or_ip, node_port) def getClient(self): return self.client def getLock(self, name, timeout=None, sleep=0.1): return self.client.lock(name, timeout, sleep) def set(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise if ttl: self.client.setex(key, ttl, value) else: self.client.set(key, value) def setBuffered(self, key, value, ttl=0, pickle=True): if pickle is True: try: value = cPickle.dumps(value) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not store %s:%s in redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise if ttl: self.set_buffer.append({'key': key, 'ttl': ttl, 'value': value}) else: self.set_buffer.append({'key': key, 'value': value}) def setBufferedCallback(self, values): pipe = self.client.pipeline() for value in values: if 'ttl' in value: pipe.setex(value['key'], value['ttl'], value['value']) else: pipe.set(value['key'], value['value']) try: pipe.execute() return True except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not flush buffer. Exception: %s, Error: %s." % (etype, evalue)) def get(self, key, unpickle=True): value = self.client.get(key) if unpickle and value: try: value = cPickle.loads(value) except: etype, evalue, etb = sys.exc_info() self.logger.error( "Could not unpickle %s:%s from redis. Exception: %s, Error: %s." % (key, value, etype, evalue)) raise return value def getBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer[value_idx] except: return self._get(key, unpickle) def delete(self, key): self.client.delete(key) def deleteBuffered(self, key): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) self.set_buffer.buffer.pop(value_idx) return except: self._delete(key) def pop(self, key, unpickle=True): value = self.get(key, unpickle) if value: self.delete(key) return value def popBuffered(self, key, unpickle=True): try: value_idx = next(index for (index, entry) in enumerate(self.set_buffer.buffer) if entry["key"] == key) return self.set_buffer.buffer.pop(value_idx) except: return self._pop(key, unpickle) def shutDown(self): try: self.buffer.flush() except: pass BaseThreadedModule.shutDown(self)