示例#1
0
 def storeData(self, events):
     mongo_db = self.mongodb[self.database]
     bulk_objects = {}
     for event in events:
         collection_name = mapDynamicValueInString(
             self.collection, event, use_strftime=True).lower()
         doc_id = mapDynamicValue(self.doc_id_pattern, event)
         if not doc_id:
             self.logger.error("Could not find doc_id %s for event %s." %
                               (self.doc_id_pattern, event))
             continue
         event['_id'] = doc_id
         if collection_name not in bulk_objects.keys():
             bulk_objects[collection_name] = mongo_db[
                 collection_name].initialize_ordered_bulk_op()
         try:
             bulk_objects[collection_name].insert(event)
         except:
             etype, evalue, etb = sys.exc_info()
             self.logger.error(
                 "Server communication error. Exception: %s, Error: %s." %
                 (etype, evalue))
             self.logger.debug("Payload: %s" % event)
             if "Broken pipe" in evalue or "Connection reset by peer" in evalue:
                 self.mongodb = self.connect()
     for collection_name, bulk_object in bulk_objects.iteritems():
         try:
             result = bulk_object.execute()
             self.logger.debug(str(result))
         except:
             etype, evalue, etb = sys.exc_info()
             self.logger.error(
                 "Server communication error. Exception: %s, Error: %s." %
                 (etype, evalue))
 def dataToElasticSearchJson(self, events):
     """
     Format data for elasticsearch bulk update.
     """
     json_data = []
     for event in events:
         index_name = mapDynamicValueInString(self.index_name, event, use_strftime=True).lower()
         doc_type = mapDynamicValueInString(self.doc_type_pattern, event)
         doc_id = mapDynamicValueInString(self.doc_id_pattern, event)
         routing = mapDynamicValue(self.routing_pattern, use_strftime=True)
         if not doc_id:
             self.logger.error("Could not find doc_id %s for event %s." % (self.getConfigurationValue("doc_id"), event))
             continue
         header = {self.action: {'_index': index_name,
                                 '_type': doc_type,
                                 '_id': doc_id}}
         if self.routing_pattern:
             header['index']['_routing'] = routing
         if self.ttl:
             header['index']['_ttl'] = self.ttl
         if self.action == 'update':
             event = {'doc': event}
         try:
             json_data.append("\n".join((json.dumps(header), json.dumps(event), "\n")))
         except UnicodeDecodeError:
             etype, evalue, etb = sys.exc_info()
             self.logger.error("Could not json encode %s. Exception: %s, Error: %s." % (event, etype, evalue))
     json_data = "".join(json_data)
     return json_data
示例#3
0
 def storeData(self, events):
     mongo_db = self.mongodb[self.database]
     bulk_objects = {}
     for event in events:
         collection_name = mapDynamicValueInString(self.collection, event, use_strftime=True).lower()
         doc_id = mapDynamicValue(self.doc_id_pattern, event)
         if not doc_id:
             self.logger.error("Could not find doc_id %s for event %s." % (self.doc_id_pattern, event))
             continue
         event['_id'] = doc_id
         if collection_name not in bulk_objects.keys():
             bulk_objects[collection_name] = mongo_db[collection_name].initialize_ordered_bulk_op()
         try:
             bulk_objects[collection_name].insert(event)
         except:
             etype, evalue, etb = sys.exc_info()
             self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue))
             self.logger.debug("Payload: %s" % event)
             if "Broken pipe" in evalue or "Connection reset by peer" in evalue:
                 self.mongodb = self.connect()
     for collection_name, bulk_object in bulk_objects.iteritems():
         try:
             result = bulk_object.execute()
             self.logger.debug(str(result))
         except:
             etype, evalue, etb = sys.exc_info()
             self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue))
示例#4
0
 def storeData(self, buffered_data):
     batch_messages = []
     for event in buffered_data:
         try:
             id = event['lumbermill']['event_id']
         except KeyError:
             id = "%032x%s" % (random.getrandbits(128), os.getpid())
         message = {'Id': id}
         if self.format:
             event = mapDynamicValue(self.format, event)
         else:
             try:
                 event = json.dumps(event)
             except:
                 etype, evalue, etb = sys.exc_info()
                 self.logger.warning(
                     "Error while encoding event data: %s to json. Exception: %s, Error: %s."
                     % (event, etype, evalue))
         message['MessageBody'] = event
         batch_messages.append(message)
         if len(batch_messages) % 10:
             self.sqs_queue.send_messages(Entries=batch_messages)
             batch_messages = []
     if len(batch_messages) > 0:
         self.send()
示例#5
0
 def handleEvent(self, event):
     if self.format:
         publish_data = mapDynamicValue(self.format, event)
     else:
         publish_data = event
     self.buffer.append(publish_data)
     yield None
 def __handleEvent(self, event):
     if self.format:
         publish_data = mapDynamicValue(self.format, event)
     else:
         publish_data = event
     self.buffer.append(publish_data)
     yield None
示例#7
0
 def handleEvent(self, event):
     if self.format:
         publish_data = mapDynamicValue(self.format, event)
     else:
         publish_data = msgpack.packb(event)
     if self.topic:
          publish_data = "%s %s" % (self.topic, publish_data)
     self.buffer.append(publish_data)
     yield None
 def testMapDynamicValueWithValueFormat(self):
     self.assertTrue(mapDynamicValue('%(longitude)d', self.event) == '7')
     self.assertTrue(mapDynamicValue('%(longitude)+d', self.event) == '+7')
     self.assertTrue(mapDynamicValue('%(longitude)05.2f', self.event) == '07.63')
     self.assertTrue(mapDynamicValue('%(fields.1)10s', self.event) == '   expects')
     self.assertTrue(mapDynamicValue('%(fields.1)-10s', self.event) == 'expects   ')
     self.assertTrue(mapDynamicValue('%(fields.1).5s', self.event) == 'expec')
     self.assertTrue(mapDynamicValue('%(fields.1)-10.5s', self.event) == 'expec     ')
 def testMapDynamicValueWithValueFormat(self):
     self.assertTrue(mapDynamicValue('%(longitude)d', self.event) == '7')
     self.assertTrue(mapDynamicValue('%(longitude)+d', self.event) == '+7')
     self.assertTrue(mapDynamicValue('%(longitude)05.2f', self.event) == '07.63')
     self.assertTrue(mapDynamicValue('%(fields.1)10s', self.event) == '   expects')
     self.assertTrue(mapDynamicValue('%(fields.1)-10s', self.event) == 'expects   ')
     self.assertTrue(mapDynamicValue('%(fields.1).5s', self.event) == 'expec')
     self.assertTrue(mapDynamicValue('%(fields.1)-10.5s', self.event) == 'expec     ')
示例#10
0
 def dataToElasticSearchJson(self, events):
     """
     Format data for elasticsearch bulk update.
     """
     json_data = []
     for event in events:
         index_name = mapDynamicValueInString(self.index_name,
                                              event,
                                              use_strftime=True).lower()
         event_type = event['lumbermill'][
             'event_type'] if 'lumbermill' in event and 'event_type' in event[
                 'lumbermill'] else 'Unknown'
         doc_id = mapDynamicValue(self.doc_id_pattern, event)
         routing = mapDynamicValue(self.routing_pattern, use_strftime=True)
         if not doc_id:
             self.logger.error(
                 "Could not find doc_id %s for event %s." %
                 (self.getConfigurationValue("doc_id"), event))
             continue
         header = {
             self.action: {
                 '_index': index_name,
                 '_type': event_type,
                 '_id': doc_id
             }
         }
         if self.routing_pattern:
             header['index']['_routing'] = routing
         if self.ttl:
             header['index']['_ttl'] = self.ttl
         if self.action == 'update':
             event = {'doc': event}
         try:
             json_data.append("\n".join(
                 (json.dumps(header), json.dumps(event), "\n")))
         except UnicodeDecodeError:
             etype, evalue, etb = sys.exc_info()
             self.logger.error(
                 "Could not json encode %s. Exception: %s, Error: %s." %
                 (event, etype, evalue))
     json_data = "".join(json_data)
     return json_data
示例#11
0
 def handleEvent(self, event):
     if self.format:
         publish_data = mapDynamicValue(self.format, event)
     else:
         publish_data = event
     try:
         self.client.publish(self.getConfigurationValue('channel', event), publish_data)
     except:
         etype, evalue, etb = sys.exc_info()
         self.logger.error("Could not publish event to redis channel %s at %s. Exception: %s, Error: %s." % (self.getConfigurationValue('channel', event), self.getConfigurationValue('server'), etype, evalue))
     yield None
示例#12
0
 def storeEvents(self, events):
     """
     As a sidenote: synchronizing multiple processes with a lock to ensure only one process will write to a given
     file, seems not to work as expected. webhdfs does not directly free a lease on a file after appending.
     A better approach seems to be to retry the write a number of times before failing.
     """
     if len(events) == 0:
         return
     self.is_storing = True
     path = time.strftime(self.path)
     self.ensureDirExists(path)
     write_data = collections.defaultdict(str)
     for event in events:
         filename = time.strftime(
             self.getConfigurationValue('name_pattern'))
         filename = filename % event
         line = mapDynamicValue(self.format, event)
         write_data[filename] += line
     write_tries = 0
     retry_sleep_time = .4
     for filename, lines in write_data.items():
         if self.compress == 'gzip':
             filename += ".gz"
             lines = self.compressGzip(lines)
         elif self.compress == 'snappy':
             filename += ".snappy"
             lines = self.compressSnappy(lines)
         while write_tries < 10:
             try:
                 self.ensureFileExists('%s/%s' % (path, filename))
                 self.hdfs.append_file('%s/%s' % (path, filename), lines)
                 break
             except KeyError:
                 etype, evalue, etb = sys.exc_info()
                 self.logger.error(
                     'Could no log event %s. The format key %s was not present in event.'
                     % (event, evalue))
             except pywebhdfs.errors.PyWebHdfsException:
                 write_tries += 1
                 # Retry max_retry times. This can solve problems like leases beeing hold by another process.
                 if write_tries < 10:
                     time.sleep(retry_sleep_time * write_tries)
                     continue
                 # Issue error after max retries.
                 etype, evalue, etb = sys.exc_info()
                 self.logger.error(
                     'Max write retries reached. Could no log event %s. Exception: %s, Error: %s.'
                     % (event, etype, evalue))
     self.events_container = []
     self.is_storing = False
示例#13
0
 def handleEvent(self, event):
     while self.printing:
         time.sleep(.0001)
     self.printing = True
     if self.format:
         output = mapDynamicValue(self.format, event)
     else:
         output = event
     if self.pretty_print and not self.format:
         pprint.pprint(output, indent=4)
     else:
         print("%s" % output)
     self.printing = False
     yield None
示例#14
0
 def storeData(self, events):
     write_data = collections.defaultdict(str)
     for event in events:
         path = mapDynamicValue(self.file_name,
                                mapping_dict=event,
                                use_strftime=True)
         line = mapDynamicValue(self.format, mapping_dict=event)
         write_data["%s" % path] += line + "\n"
     for path, lines in write_data.items():
         try:
             self.ensurePathExists(path)
         except:
             etype, evalue, etb = sys.exc_info()
             self.logger.error(
                 'Could no create path %s. Events could not be written. Exception: %s, Error: %s.'
                 % (path, etype, evalue))
             return
         mode = "a+"
         if self.compress == 'gzip':
             path += ".gz"
             mode += "b"
             lines = self.compressGzip(lines)
         elif self.compress == 'snappy':
             path += ".snappy"
             lines = self.compressSnappy(lines)
             mode += "b"
         try:
             fh = self.getOrCreateFileHandle(path, mode)
             fh.write(lines)
             fh.flush()
             return True
         except:
             etype, evalue, etb = sys.exc_info()
             self.logger.error(
                 'Could no write event data to %s. Exception: %s, Error: %s.'
                 % (path, etype, evalue))
示例#15
0
 def handleEvent(self, event):
     if self.format:
         publish_data = mapDynamicValue(self.format, event)
     else:
         publish_data = event
     try:
         self.client.publish(self.getConfigurationValue('channel', event),
                             publish_data)
     except:
         etype, evalue, etb = sys.exc_info()
         self.logger.error(
             "Could not publish event to redis channel %s at %s. Exception: %s, Error: %s."
             % (self.getConfigurationValue('channel', event),
                self.getConfigurationValue('server'), etype, evalue))
     yield None
示例#16
0
 def configure(self, configuration):
     # Call parent configure method.
     BaseThreadedModule.configure(self, configuration)
     # Set log level for elasticsarch library if configured to other than default.
     if self.getConfigurationValue('log_level') != 'info':
         logging.getLogger('elasticsearch').setLevel(self.logger.level)
         logging.getLogger('requests').setLevel(self.logger.level)
     else:
         logging.getLogger('elasticsearch').setLevel(logging.WARN)
         logging.getLogger('requests').setLevel(logging.WARN)
     self.query = self.getConfigurationValue('query')
     # Test if query is valid json.
     try:
         json.loads(self.query)
     except:
         etype, evalue, etb = sys.exc_info()
         self.logger.error(
             "Parsing json query %s failed. Exception: %s, Error: %s." %
             (self.query, etype, evalue))
         self.lumbermill.shutDown()
     self.search_type = self.getConfigurationValue('search_type')
     self.batch_size = self.getConfigurationValue('batch_size')
     self.field_mappings = self.getConfigurationValue('field_mappings')
     self.es_nodes = self.getConfigurationValue('nodes')
     self.read_timeout = self.getConfigurationValue("read_timeout")
     if not isinstance(self.es_nodes, list):
         self.es_nodes = [self.es_nodes]
     self.index_name_pattern = self.getConfigurationValue('index_name')
     self.index_name = mapDynamicValue(self.index_name_pattern,
                                       use_strftime=True).lower()
     if self.getConfigurationValue("connection_type") == 'urllib3':
         self.connection_class = connection.Urllib3HttpConnection
     elif self.getConfigurationValue('connection_type') == 'requests':
         self.connection_class = connection.RequestsHttpConnection
     self.lock = Lock()
     self.manager = Manager()
     if self.search_type == 'scan':
         self.can_run_forked = True
         scroll_id = self.getInitalialScrollId()
         if not scroll_id:
             self.lumbermill.shutDown()
         self.shared_scroll_id = self.manager.Value(c_char_p, scroll_id)
     elif self.search_type == 'normal':
         self.query_from = 0
         self.query = json.loads(self.query)
         self.query['size'] = self.batch_size
         self.es = self.connect()
示例#17
0
 def storeEvents(self, events):
     """
     As a sidenote: synchronizing multiple processes with a lock to ensure only one process will write to a given
     file, seems not to work as expected. webhdfs does not directly free a lease on a file after appending.
     A better approach seems to be to retry the write a number of times before failing.
     """
     if len(events) == 0:
         return
     self.is_storing = True
     path = time.strftime(self.path)
     self.ensureDirExists(path)
     write_data = collections.defaultdict(str)
     for event in events:
         filename = time.strftime(self.getConfigurationValue('name_pattern'))
         filename = filename % event
         line = mapDynamicValue(self.format, event)
         write_data[filename] += line
     write_tries = 0
     retry_sleep_time = .4
     for filename, lines in write_data.items():
         if self.compress == 'gzip':
             filename += ".gz"
             lines = self.compressGzip(lines)
         elif self.compress == 'snappy':
             filename += ".snappy"
             lines = self.compressSnappy(lines)
         while write_tries < 10:
             try:
                 self.ensureFileExists('%s/%s' % (path, filename))
                 self.hdfs.append_file('%s/%s' % (path, filename), lines)
                 break
             except KeyError:
                 etype, evalue, etb = sys.exc_info()
                 self.logger.error('Could no log event %s. The format key %s was not present in event.' % (event, evalue))
             except pywebhdfs.errors.PyWebHdfsException:
                 write_tries += 1
                 # Retry max_retry times. This can solve problems like leases beeing hold by another process.
                 if write_tries < 10:
                     time.sleep(retry_sleep_time * write_tries)
                     continue
                 # Issue error after max retries.
                 etype, evalue, etb = sys.exc_info()
                 self.logger.error('Max write retries reached. Could no log event %s. Exception: %s, Error: %s.' % (event, etype, evalue))
     self.events_container = []
     self.is_storing = False
示例#18
0
 def storeData(self, events):
     packet = []
     for event in events:
         if self.timestamp_field:
             try:
                 timestamp = event[self.timestamp_field]
             except KeyError:
                 timestamp = None
         hostname = mapDynamicValue(self.hostname, mapping_dict=event, use_strftime=True)
         for field_name in self.fields:
             try:
                 packet.append(ZabbixMetric(hostname, "%s%s" % (self.field_prefix, field_name), event[field_name], timestamp))
             except KeyError:
                 pass
                 #self.logger.warning("Could not send metrics for %s:%s. Field not found." % (hostname, field_name))
     response = self.zabbix_sender.send(packet)
     if response.failed != 0:
         self.logger.warning("%d of %d metrics were not processed correctly." % (response.total-response.processed, response.total))
示例#19
0
 def configure(self, configuration):
     # Call parent configure method.
     BaseThreadedModule.configure(self, configuration)
     # Set log level for elasticsarch library if configured to other than default.
     if self.getConfigurationValue('log_level') != 'info':
         logging.getLogger('elasticsearch').setLevel(self.logger.level)
         logging.getLogger('requests').setLevel(self.logger.level)
     else:
         logging.getLogger('elasticsearch').setLevel(logging.WARN)
         logging.getLogger('requests').setLevel(logging.WARN)
     self.query = self.getConfigurationValue('query')
     # Test if query is valid json.
     try:
         json.loads(self.query)
     except:
         etype, evalue, etb = sys.exc_info()
         self.logger.error("Parsing json query %s failed. Exception: %s, Error: %s." % (self.query, etype, evalue))
         self.lumbermill.shutDown()
     self.search_type = self.getConfigurationValue('search_type')
     self.batch_size = self.getConfigurationValue('batch_size')
     self.field_mappings = self.getConfigurationValue('field_mappings')
     self.es_nodes = self.getConfigurationValue('nodes')
     self.read_timeout = self.getConfigurationValue("read_timeout")
     if not isinstance(self.es_nodes, list):
         self.es_nodes = [self.es_nodes]
     self.index_name_pattern = self.getConfigurationValue('index_name')
     self.index_name = mapDynamicValue(self.index_name_pattern, use_strftime=True).lower()
     if self.getConfigurationValue("connection_type") == 'urllib3':
         self.connection_class = connection.Urllib3HttpConnection
     elif self.getConfigurationValue('connection_type') == 'requests':
         self.connection_class = connection.RequestsHttpConnection
     self.lock = Lock()
     self.manager = Manager()
     if self.search_type == 'scroll':
         self.can_run_forked = True
         scroll_id = self.getInitalialScrollId()
         if not scroll_id:
             self.lumbermill.shutDown()
         self.shared_scroll_id = self.manager.Value(c_char_p, scroll_id)
     elif self.search_type == 'normal':
         self.query_from = 0
         self.query = json.loads(self.query)
         self.query['size'] = self.batch_size
         self.es = self.connect()
示例#20
0
 def handleEvent(self, event):
     while self.printing:
         time.sleep(.0001)
     self.printing = True
     if self.format:
         output = mapDynamicValue(self.format, event)
         print("%s" % output)
     elif self.pretty_print:
         if not self.fields:
             output = event
         else:
             output = {}
             for field in self.fields:
                 try:
                     value = event[field]
                 except KeyError:
                     continue
                 output[field] = value
         pprint.pprint(output, indent=4)
     self.printing = False
     yield None
 def testMapDynamicValueWithTimePattern(self):
     timestring = datetime.datetime.utcnow().strftime('%Y.%m.%d')
     self.assertTrue(mapDynamicValue('test-%Y.%m.%d-%(lumbermill.event_id)s', self.event, use_strftime=True) == 'test-%s-715bd321b1016a442bf046682722c78e' % timestring)
 def testMapDynamicValueWithDictType(self):
     # Make sure that mapDynamicValue will work on a copy of value when passing in a list or a dict.
     mapping_dict = {'event_id': '%(lumbermill.event_id)s'}
     mapped_values = mapDynamicValue(mapping_dict, self.event)
     self.assertEquals(mapped_values['event_id'], '715bd321b1016a442bf046682722c78e')
     self.assertEquals(mapping_dict, {'event_id': '%(lumbermill.event_id)s'})
 def testMapDynamicValues(self):
     self.assertTrue(mapDynamicValue('%(bytes_send)s', self.event) == "3395")
     self.assertTrue(mapDynamicValue('%(lumbermill.event_id)s', self.event) == "715bd321b1016a442bf046682722c78e")
     self.assertTrue(mapDynamicValue('%(lumbermill.list.0)s', self.event) == "10")
     self.assertTrue(mapDynamicValue('%(lumbermill.list.2.hovercraft)s', self.event) == "eels")
     self.assertTrue(mapDynamicValue('%(params.spanish)s', self.event) == "[u'inquisition']")
示例#24
0
 def handleEvent(self, event):
     if self.format:
         self.syslogger.info(mapDynamicValue(self.format, event))
     else:
         self.syslogger.info(event)
     yield None
示例#25
0
 def handleEvent(self, event):
     if self.format:
         self.syslogger.info(mapDynamicValue(self.format, event))
     else:
         self.syslogger.info(event)
     yield None
示例#26
0
 def handleEvent(self, event):
     throttled_event_key = mapDynamicValue(self.key, event)
     throttled_event_count = self.setAndGetEventCountByKey(throttled_event_key)
     if self.min_count <= throttled_event_count <= self.max_count:
         yield event
 def testMapDynamicValueWithNoneType(self):
     self.assertEquals(mapDynamicValue(None, self.event), None)
 def testMapDynamicValueWithListType(self):
     # Make sure that mapDynamicValue will work on a copy of value when passing in a list or a dict.
     mapping_list = ['%(lumbermill.event_id)s']
     mapped_values = mapDynamicValue(mapping_list, self.event)
     self.assertEquals(mapped_values[0], '715bd321b1016a442bf046682722c78e')
     self.assertEquals(mapping_list, ['%(lumbermill.event_id)s'])
 def testMapDynamicValues(self):
     self.assertTrue(mapDynamicValue('%(bytes_send)s', self.event) == "3395")
     self.assertTrue(mapDynamicValue('%(lumbermill.event_id)s', self.event) == "715bd321b1016a442bf046682722c78e")
     self.assertTrue(mapDynamicValue('%(lumbermill.list.0)s', self.event) == "10")
     self.assertTrue(mapDynamicValue('%(lumbermill.list.2.hovercraft)s', self.event) == "eels")
     self.assertTrue(mapDynamicValue('%(params.spanish)s', self.event) == "[u'inquisition']")
 def testMapDynamicValueWithMissingKey(self):
     self.assertTrue(mapDynamicValue('%(missing_key)s', self.event) == '%(missing_key)s')
 def testMapDynamicValueWithNoneType(self):
     self.assertEquals(mapDynamicValue(None, self.event), None)
 def testMapDynamicValueWithTimePattern(self):
     timestring = datetime.datetime.utcnow().strftime('%Y.%m.%d')
     self.assertTrue(mapDynamicValue('test-%Y.%m.%d-%(lumbermill.event_id)s', self.event, use_strftime=True) == 'test-%s-715bd321b1016a442bf046682722c78e' % timestring)
 def testMapDynamicValueWithMissingKey(self):
     self.assertTrue(mapDynamicValue('%(missing_key)s', self.event) == '%(missing_key)s')