示例#1
0
 def processStatistics(self):
     stats_event = {"stats_type": "process_stats", "timestamp": time.time()}
     stats_event["worker_count"] = len(self.lumbermill.child_processes) + 1
     stats_event["uptime"] = int(time.time() - self.psutil_processes[0].create_time())
     self.logger.info(">> Process stats")
     self.logger.info("num workers: %d" % (len(self.lumbermill.child_processes)+1))
     self.logger.info("started: %s" % datetime.datetime.fromtimestamp(self.psutil_processes[0].create_time()).strftime("%Y-%m-%d %H:%M:%S"))
     aggregated_metrics = defaultdict(int)
     for psutil_process in self.psutil_processes:
         stats_event["pid"] = psutil_process.pid
         for metric_name, metric_value in psutil_process.as_dict(self.process_statistics).iteritems():
             # Call metric specific method if it exists.
             if "convertMetric_%s" % metric_name in self.methods:
                 metric_name, metric_value = getattr(self, "convertMetric_%s" % self.action)(metric_name, metric_value)
             try:
                 aggregated_metrics[metric_name] += metric_value
             except TypeError:
                 try:
                     metric_value = dict(metric_value.__dict__)
                 except:
                     pass
                 try:
                     stats_event[metric_name].append(metric_value)
                 except KeyError:
                     stats_event[metric_name] = [metric_value]
                 self.logger.info("%s(pid: %s): %s" % (metric_name, psutil_process.pid, metric_value))
         if self.emit_as_event:
             self.sendEvent(DictUtils.getDefaultEventDict(stats_event, caller_class_name="Statistics", event_type="statistic"))
     for agg_metric_name, agg_metric_value in aggregated_metrics.iteritems():
         self.logger.info("%s: %s" % (agg_metric_name, agg_metric_value))
     if self.emit_as_event:
         self.sendEvent(DictUtils.getDefaultEventDict(aggregated_metrics, caller_class_name="Statistics", event_type="statistic"))
 def testMd5Hash(self):
     self.test_object.configure({'action': 'hash',
                                 'source_fields': ['hash_me'],
                                 'target_fields': ['hash_me_hashed']})
     expected = DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'hash_me': 'Nobody inspects the spammish repetition', 'hash_me_hashed': 'bb649c83dd1ea5c9d9dec9a18df0ffe9'})
     for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'hash_me': 'Nobody inspects the spammish repetition'})):
         self.assertEqual(event, expected)
 def testAnonymizeMd5(self):
     self.test_object.configure({'action': 'anonymize',
                                 'source_fields': ['anon_me'],
                                 'algorithm': 'md5'})
     expected = DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'anon_me': 'bb649c83dd1ea5c9d9dec9a18df0ffe9'})
     for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'anon_me': 'Nobody inspects the spammish repetition'})):
         self.assertEqual(event, expected)
示例#4
0
 def testSha1Hash(self):
     self.test_object.configure({
         'action': 'hash',
         'algorithm': 'sha1',
         'source_fields': ['hash_me'],
         'target_fields': ['hash_me_hashed']
     })
     expected = DictUtils.getDefaultEventDict({
         'lumbermill': {
             'id': 1
         },
         'hash_me':
         'Nobody inspects the spammish repetition',
         'hash_me_hashed':
         '531b07a0f5b66477a21742d2827176264f4bbfe2'
     })
     for event in self.test_object.handleEvent(
             DictUtils.getDefaultEventDict({
                 'lumbermill': {
                     'id': 1
                 },
                 'hash_me':
                 'Nobody inspects the spammish repetition'
             })):
         self.assertEqual(event, expected)
示例#5
0
 def run(self):
     for found_file in self.files:
         if not os.path.isfile(found_file):
             self.logger.warning("File %s does not exist. Skipping." %
                                 found_file)
             continue
         with open(found_file, 'r') as data_file:
             if self.line_by_line:
                 for line in data_file:
                     self.sendEvent(
                         DictUtils.getDefaultEventDict(
                             dict={
                                 "filename": found_file,
                                 "data": line
                             },
                             caller_class_name=self.__class__.__name__))
             else:
                 self.sendEvent(
                     DictUtils.getDefaultEventDict(
                         dict={
                             "filename": found_file,
                             "data": data_file.read()
                         },
                         caller_class_name=self.__class__.__name__))
     self.lumbermill.shutDown()
 def testSha1Hash(self):
     self.test_object.configure({'action': 'hash',
                                 'algorithm': 'sha1',
                                 'source_fields': ['hash_me'],
                                 'target_fields': ['hash_me_hashed']})
     expected = DictUtils.getDefaultEventDict({'lumbermill': {'id': 1}, 'hash_me': 'Nobody inspects the spammish repetition', 'hash_me_hashed': '531b07a0f5b66477a21742d2827176264f4bbfe2'})
     for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({'lumbermill': {'id': 1}, 'hash_me': 'Nobody inspects the spammish repetition'})):
         self.assertEqual(event, expected)
示例#7
0
 def extractFieldsFromResultDocument(self, fields, document):
     document = DictUtils.KeyDotNotationDict(document)
     new_document = DictUtils.KeyDotNotationDict()
     for field in fields:
         if field not in document:
             continue
         new_document[field] = document[field]
     return new_document
示例#8
0
 def extractFieldsFromResultDocumentWithMapping(self, field_mapping,
                                                document):
     document = DictUtils.KeyDotNotationDict(document)
     new_document = DictUtils.KeyDotNotationDict()
     for source_field, target_field in field_mapping.iteritems():
         if source_field not in document:
             continue
         new_document[target_field] = document[source_field]
     return new_document
示例#9
0
 def testNewlineEndEvent(self):
     self.test_object.configure({'pattern': "\n$",
                                 'pattern_marks': 'EndOfEvent'})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     event = DictUtils.getDefaultEventDict({'data': 'No newline.'}, received_from='TestMergeEvent_%s' % os.getpid())
     self.test_object.receiveEvent(event)
     event = DictUtils.getDefaultEventDict({'data': "But now: \n"}, received_from='TestMergeEvent_%s' % os.getpid())
     self.test_object.receiveEvent(event)
     time.sleep(1.5)
     events = []
     for event in self.receiver.getEvent():
         events.append(event)
     self.assertEquals(len(events), 1)
     self.assertEquals(events[0]['data'], 'No newline.But now: \n')
示例#10
0
    def testFacetValuesMustBeUnique(self):
        rc = RedisStore.RedisStore(mock.Mock())
        rc.configure({'server': 'localhost'})
        self.test_object.lumbermill.modules = {
            'RedisStore': {
                'instances': [rc]
            }
        }
        self.test_object.configure({
            'source_field': 'url',
            'group_by': '$(remote_ip)',
            'interval': 4,
            'backend': 'RedisStore',
            'backend_ttl': 10
        })
        self.checkConfiguration()
        self.test_object.initAfterFork()
        self.test_object.receiveEvent(
            DictUtils.getDefaultEventDict({
                'url': 'http://www.google.com',
                'remote_ip': '127.0.0.1',
                'user_agent': 'Eric'
            }))

        time.sleep(1)
        self.test_object.receiveEvent(
            DictUtils.getDefaultEventDict({
                'url': 'http://www.google.com',
                'remote_ip': '127.0.0.1',
                'user_agent': 'Eric'
            }))
        time.sleep(1)
        self.test_object.receiveEvent(
            DictUtils.getDefaultEventDict({
                'url': 'http://www.google.com',
                'remote_ip': '127.0.0.1',
                'user_agent': 'Eric'
            }))
        time.sleep(1)
        self.test_object.shutDown()
        events = []
        for event in self.receiver.getEvent():
            if event['lumbermill']['event_type'] != 'facet':
                continue
            events.append(event)
        self.assertEquals(len(events), 1)
        self.assertEquals(len(events[0]['facets']), 1)
        self.assertEquals(events[0]['facets'][0], 'http://www.google.com')
示例#11
0
 def handleBatchEvents(self):
     pipeline = self.client.pipeline()
     while self.alive:
         for _ in range(0, self.batch_size):
             pipeline.blpop(self.lists, timeout=self.timeout)
         try:
             events = pipeline.execute()
         except:
             exc_type, exc_value, exc_tb = sys.exc_info()
             self.logger.error(
                 "Could not read data from redis list(s) %s. Exception: %s, Error: %s."
                 % (self.lists, exc_type, exc_value))
             continue
         for event in events:
             # If batch_size is bigger than events waiting in redis queue, the remaining entries will be filled with None values.
             # So break out if a None value is found.
             if not event:
                 # Queue is exhausted. Sleep a bit and retry.
                 time.sleep(.5)
                 break
             event = DictUtils.getDefaultEventDict(
                 dict={
                     "received_from": '%s' % event[0],
                     "data": event[1]
                 },
                 caller_class_name=self.__class__.__name__)
             self.sendEvent(event)
示例#12
0
 def setUp(self):
     event = {
         'bytes_send': 3395,
         'data':
         '192.168.2.20 - - [28/Jul/2006:10:27:10 -0300] "GET /wiki/Monty_Python/?spanish=inquisition HTTP/1.0" 200 3395\n',
         'datetime': '28/Jul/2006:10:27:10 -0300',
         'lumbermill': {
             'event_id': '715bd321b1016a442bf046682722c78e',
             'event_type': 'httpd_access_log',
             'received_from': '127.0.0.1',
             'source_module': 'StdIn',
             'list': [10, 20, {
                 'hovercraft': 'eels'
             }]
         },
         'http_status': 200,
         'identd': '-',
         'remote_ip': '192.168.2.20',
         'url': 'GET /wiki/Monty_Python/?spanish=inquisition HTTP/1.0',
         'fields': ['nobody', 'expects', 'the'],
         'params': {
             u'spanish': [u'inquisition']
         },
         'user': '******'
     }
     self.event = DictUtils.getDefaultEventDict(event)
示例#13
0
 def testAddGeoInfoFromDefaultField(self):
     self.test_object.configure({'geoip_dat_path': './test_data/GeoLiteCity.dat',
                                 'geo_info_fields': ['country_code']})
     self.checkConfiguration()
     dict = DictUtils.getDefaultEventDict({'x_forwarded_for': '99.124.167.129'})
     for event in self.test_object.handleEvent(dict):
         self.assertEqual(event['country_code'], 'US')
示例#14
0
 def testHttpsQuery(self):
     self.test_object.configure({'url': 'https://www.google.com'})
     self.checkConfiguration()
     for event in self.test_object.handleEvent(
             DictUtils.getDefaultEventDict({'TreeNodeID': '1'})):
         self.assertTrue('gambolputty_http_request' in event
                         and len(event['gambolputty_http_request']) > 0)
示例#15
0
 def testUnixSocket(self):
     try:
         os.remove('/tmp/test.sock')
     except OSError:
         pass
     self.assertFalse(os.path.exists('/tmp/test.sock'))
     self.test_object.configure({'path_to_socket': '/tmp/test.sock'})
     self.checkConfiguration()
     self.test_object.start()
     self.startTornadoEventLoop()
     time.sleep(.1)
     self.assertTrue(os.path.exists('/tmp/test.sock'))
     unix_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
     try:
         unix_socket.connect('/tmp/test.sock')
     except socket.errno:
         self.fail("Could not connect to unix socket.")
     for _ in range(0, 5000):
         unix_socket.send(
             b"http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n")
     expected_ret_val = DictUtils.getDefaultEventDict({
         'data':
         "http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n"
     })
     expected_ret_val.pop('lumbermill')
     time.sleep(.5)
     event = False
     counter = 0
     for event in self.receiver.getEvent():
         counter += 1
     self.assertTrue(event)
     self.assertEqual(counter, 5000)
     event.pop('lumbermill')
     self.assertDictEqual(event, expected_ret_val)
示例#16
0
 def run(self):
     while self.alive:
         packet = None
         try:
             pcap_header, packet = self.sniffer.next()
         except:
             pass
         if not packet:
             continue
         decoder = self.getPacketDecoder('eth')
         if not decoder:
             continue
         decoded_data = {'protocols': []}
         for decoded_packet in decoder.decodePacket(packet):
             packet_type = str(type(decoded_packet))
             if packet_type == "<class 'impacket.ImpactPacket.Ethernet'>":
                 self.parseEtherPacket(decoded_packet, decoded_data)
             elif packet_type == "<class 'impacket.ImpactPacket.IP'>":
                 self.parseIPPacketEvent(decoded_packet, decoded_data)
             elif packet_type == "<class 'impacket.ImpactPacket.TCP'>":
                 self.parseTCPPacketEvent(decoded_packet, decoded_data)
             elif packet_type == "<class 'impacket.ImpactPacket.Data'>":
                 self.parseDataPacketEvent(decoded_packet, decoded_data)
         if decoded_data['data']:
             event = DictUtils.getDefaultEventDict(caller_class_name=self.__class__.__name__)
             if self.target_field:
                 event[self.target_field] = decoded_data
             else:
                 event.update(decoded_data)
             self.sendEvent(event)
示例#17
0
 def testZmqPull(self):
     ipaddr, port = self.getFreePortoOnLocalhost()
     self.test_object.configure({
         'address': '%s:%s' % (ipaddr, port),
         'pattern': 'pull'
     })
     self.checkConfiguration()
     self.test_object.start()
     message = 'A comfy chair is not an effective method of torture!'
     sender = self.getZmqSocket(ipaddr, port, 'push')
     self.assertTrue(sender is not None)
     for _ in range(0, 1000):
         sender.send(message)
     sender.close()
     expected_ret_val = DictUtils.getDefaultEventDict(
         {'data': 'A comfy chair is not an effective method of torture!'})
     expected_ret_val.pop('lumbermill')
     event = False
     time.sleep(.1)
     counter = 0
     for event in self.receiver.getEvent():
         counter += 1
     self.assertTrue(event is not False)
     event.pop('lumbermill')
     self.assertDictEqual(event, expected_ret_val)
     self.assertEqual(counter, 1000)
示例#18
0
 def testSQSSink(self):
     self.test_object.configure({'aws_access_key_id': os.environ['AWS_ID'],
                                 'aws_secret_access_key': os.environ['AWS_KEY'],
                                 'region': 'eu-west-1',
                                 'queue': self.queue_name})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     # Send some messages to the test queue.
     for _ in range(0, 100):
         event = DictUtils.getDefaultEventDict({u'data': u"You get 'Gone with the Wind', 'Les Miserables' by Victor Hugo, "
                                                     u"'The French Lieutenant's Woman' and with every third book you get dung."})
         self.test_object.receiveEvent(event)
     self.test_object.shutDown()
     # Give messages some time to arrive.
     time.sleep(2)
     # Get messages from queue
     messages = []
     for _ in range(0, 50):
         response = self.sqs_client.receive_message(QueueUrl=self.sqs_queue.url,
                                                    MaxNumberOfMessages=10)
         if not 'Messages' in response:
             break
         for message in response['Messages']:
             messages.append(message)
     self.assertEqual(len(messages), 100)
     self.assertEqual(json.loads(messages[0]['Body'])['data'], event['data'])
示例#19
0
 def testAddDateTimeCustomFormat(self):
     self.test_object.configure({'format': '%Y/%M/%d %H.%M.%S'})
     for event in self.test_object.handleEvent(
             DictUtils.getDefaultEventDict({})):
         self.assert_(
             re.match('^\d+/\d+/\d+ \d+.\d+.\d+$',
                      event['@timestamp']))  # 2013/08/29 10.25.26
示例#20
0
 def testUnixSocket(self):
     self.test_object.configure({'path_to_socket': '/tmp/test.sock'})
     raise unittest.SkipTest('Skipping test because UnixSocket input is currently broken.')
     try:
         os.remove('/tmp/test.sock')
     except OSError:
         pass
     self.assertFalse(os.path.exists('/tmp/test.sock'))
     self.checkConfiguration()
     self.test_object.start()
     self.startTornadoEventLoop()
     time.sleep(.1)
     self.assertTrue(os.path.exists('/tmp/test.sock'))
     unix_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
     try:
         unix_socket.connect('/tmp/test.sock')
     except socket.errno:
         self.fail("Could not connect to unix socket.")
     for _ in range(0,5000):
         unix_socket.send(b"http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n")
     expected_ret_val = DictUtils.getDefaultEventDict({'data': "http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n"})
     expected_ret_val.pop('lumbermill')
     time.sleep(.5)
     event = False
     counter = 0
     for event in self.receiver.getEvent():
         counter += 1
     self.assertTrue(event)
     self.assertEqual(counter, 5000)
     event.pop('lumbermill')
     self.assertDictEqual(event, expected_ret_val)
示例#21
0
 def requeueEvents(self):
     input_modules = {}
     for module_name, module_info in self.lumbermill.modules.items():
         instance = module_info['instances'][0]
         if instance.module_type == "input":
             input_modules[instance.__class__.__name__] = instance
     self.logger.warning("Found unfinished events. Requeing...")
     for key in self.persistence_backend.iterKeys():
         if not key.startswith("%s" % self.key_prefix):
             continue
         requeue_counter = 0
         event = self.persistence_backend.pop(key)
         if not event:
             continue
         if "source_module" not in event.get("lumbermill", {}):
             self.logger.warning(
                 "Could not requeue event. Source module info not found in event data."
             )
             continue
         source_module = event["lumbermill"]["source_module"]
         if source_module not in input_modules:
             self.logger.error(
                 "Could not requeue event. Module %s not found." %
                 (source_module))
             continue
         requeue_counter += 1
         input_modules[source_module].sendEvent(
             DictUtils.KeyDotNotationDict(event))
     self.logger.warning("Done. Requeued %s events." % (requeue_counter))
     self.logger.warning(
         "Note: If more than one gp instance is running, requeued events count may differ from total events."
     )
 def __testStorageTTL(self):
     """
     Does not seem to be testable without waiting for at least 60 seconds.
     That seems to be the smallest interval the purger thread is running, no matter what I set ttl.interval to.
     The documentation @http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-indices.html#indices-ttl
     does not say anything about a lower limit but testing leads me to the assumption that 60s is the lowest limit.
     """
     self.test_object.configure({'nodes': [self.es_server],
                                 'index_name': self.test_index_name,
                                 'ttl': 100,
                                 'sniff_on_start': False,
                                 'store_interval_in_secs': 1})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     # Enable ttl mapping.
     self.es.indices.close(index=self.test_index_name)
     self.es.indices.put_settings(index=self.test_index_name, body='{"ttl": {"interval" : "1s"}}')
     self.es.indices.open(index=self.test_index_name)
     self.es.indices.put_mapping(index=self.test_index_name, doc_type='Unknown', body='{"_ttl" : { "enabled" : true }}')
     event = DictUtils.getDefaultEventDict({'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered."})
     doc_id = event['lumbermill']['event_id']
     self.test_object.receiveEvent(event)
     self.test_object.shutDown()
     try:
         result = self.es.get(index=self.test_index_name, doc_type='Unknown', id=doc_id)
     except elasticsearch.NotFoundError:
         self.fail("Document was not found.")
     self.assertEqual(type(result), dict)
     self.assertDictContainsSubset(event, result['_source'])
     time.sleep(2)
     try:
         result = self.es.get(index=self.test_index_name, doc_type='Unknown', id=doc_id)
         self.fail("Document was not deleted after ttl.")
     except elasticsearch.NotFoundError:
         pass
示例#23
0
 def test(self):
     self.test_object.configure({})
     self.checkConfiguration()
     self.test_object.start()
     # Give server process time to startup.
     time.sleep(.1)
     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
     s.settimeout(1)
     for _ in range(0, 100):
         s.sendto(
             "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever.",
             ('127.0.0.1', self.test_object.getConfigurationValue('port')))
     s.close()
     expected_ret_val = DictUtils.getDefaultEventDict({
         'data':
         "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever."
     })
     expected_ret_val.pop('lumbermill')
     event = False
     time.sleep(2)
     counter = 0
     for event in self.receiver.getEvent():
         counter += 1
     self.assertTrue(event != False)
     self.assertEqual(counter, 100)
     event.pop('lumbermill')
     self.assertDictEqual(event, expected_ret_val)
示例#24
0
 def run(self):
     while self.alive:
         for kafka_event in self.consumer:
             event = DictUtils.getDefaultEventDict(dict={"topic": kafka_event.topic, "data": kafka_event.value}, caller_class_name=self.__class__.__name__)
             self.sendEvent(event)
             if(self.auto_commit_enable):
                 self.consumer.task_done(kafka_event)
示例#25
0
 def testCustomDocId(self):
     self.test_object.configure({
         'host': self.mongodb_server,
         'doc_id': '$(event_doc_id)',
         'optinonal_connection_params': {
             'serverSelectionTimeoutMS': 1
         }
     })
     self.checkConfiguration()
     self.test_object.initAfterFork()
     timestring = datetime.datetime.utcnow().strftime('%Y.%m.%d')
     collection_name = 'lumbermill-%s' % timestring
     database_name = self.test_object.getConfigurationValue('database')
     event = DictUtils.getDefaultEventDict({
         'McTeagle':
         "But it was with more simple, homespun verses that McTeagle's unique style first flowered.",
         'event_doc_id': 'Ewan'
     })
     self.test_object.receiveEvent(event)
     self.test_object.shutDown()
     result = self.mongodb[database_name][collection_name].find_one(
         {'_id': event['event_doc_id']})
     self.assertEqual(type(result), dict)
     self.assertEqual(
         result['McTeagle'],
         "But it was with more simple, homespun verses that McTeagle's unique style first flowered."
     )
     self.mongodb.drop_database(database_name)
示例#26
0
 def handleFileChange(self, callback_data):
     while True:
         try:
             line = callback_data['lines'].popleft()
         except IndexError:
             break
         self.sendEvent(DictUtils.getDefaultEventDict(dict={"filename": callback_data['filename'], "data": line}, caller_class_name=self.__class__.__name__))
示例#27
0
 def testCustomDatabaseAndCustomCollection(self):
     self.test_object.configure({
         'host': self.mongodb_server,
         'database': 'my_test_database',
         'collection': 'lumbermill-$(target_collection)',
         'optinonal_connection_params': {
             'serverSelectionTimeoutMS': 1
         }
     })
     self.checkConfiguration()
     self.test_object.initAfterFork()
     collection_name = 'lumbermill-mcteagles'
     database_name = self.test_object.getConfigurationValue('database')
     event = DictUtils.getDefaultEventDict({
         'McTeagle':
         "But it was with more simple, homespun verses that McTeagle's unique style first flowered.",
         'target_collection': 'mcteagles'
     })
     self.test_object.receiveEvent(event)
     self.test_object.shutDown()
     result = self.mongodb[database_name][collection_name].find_one(
         {'_id': event['lumbermill']['event_id']})
     self.assertEqual(type(result), dict)
     self.assertEqual(
         result['McTeagle'],
         "But it was with more simple, homespun verses that McTeagle's unique style first flowered."
     )
     self.mongodb.drop_database(database_name)
 def testUserAgentTargetField(self):
     self.test_object.configure({'source_fields': 'user_agent',
                                 'target_field': 'http_user_agent_data'})
     self.checkConfiguration()
     event = DictUtils.getDefaultEventDict({'user_agent': "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"})
     for event in self.test_object.handleEvent(event):
         self.assert_('http_user_agent_data' in event and event['http_user_agent_data']['device']['family'] == "Spider")
示例#29
0
 def _on_read_line(self, data):
     self.gp_module.sendEvent(
         DictUtils.getDefaultEventDict({"data": data},
                                       caller_class_name='UnixSocket',
                                       received_from=self.address))
     if not self.stream.closed():
         self.stream.read_until_regex(b'\r?\n', self._on_read_line)
 def __testStorageTTL(self):
     """
     Does not seem to be testable without waiting for at least 60 seconds.
     That seems to be the smallest interval the purger thread is running, no matter what I set ttl.interval to.
     The documentation @http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-indices.html#indices-ttl
     does not say anything about a lower limit but testing leads me to the assumption that 60s is the lowest limit.
     """
     self.test_object.configure({'index_name': self.test_index_name,
                                 'nodes': [self.es_server],
                                 'ttl': 100,
                                 'sniff_on_start': False,
                                 'store_interval_in_secs': 1})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     # Enable ttl mapping.
     self.es.indices.close(index=self.test_index_name)
     self.es.indices.put_settings(index=self.test_index_name, body='{"ttl": {"interval" : "1s"}}')
     self.es.indices.open(index=self.test_index_name)
     self.es.indices.put_mapping(index=self.test_index_name, doc_type='Unknown', body='{"_ttl" : { "enabled" : true }}')
     event = DictUtils.getDefaultEventDict({'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered."})
     doc_id = event['lumbermill']['event_id']
     self.test_object.receiveEvent(event)
     self.test_object.shutDown()
     try:
         result = self.es.get(index=self.test_index_name, id=doc_id)
     except elasticsearch.NotFoundError:
         self.fail("Document was not found.")
     self.assertEqual(type(result), dict)
     self.assertDictContainsSubset(event, result['_source'])
     time.sleep(2)
     try:
         result = self.es.get(index=self.test_index_name, id=doc_id)
         self.fail("Document was not deleted after ttl.")
     except elasticsearch.NotFoundError:
         pass
示例#31
0
 def TestATcpConnection(self):
     print("testTcpConnection")
     self.test_object.configure({'port': 5353,
                                 'simple_separator': '\n'})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     self.startTornadoEventLoop()
     # Give server process time to startup.
     time.sleep(.1)
     try:
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         s.settimeout(1)
         s.connect(('localhost', self.test_object.getConfigurationValue('port')))
         for _ in range(0, 1500):
             s.sendall("Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever.\n")
         s.shutdown(socket.SHUT_RDWR)
         s.close()
         connection_succeeded = True
     except:
         etype, evalue, etb = sys.exc_info()
         print "Could not connect to %s:%s. Exception: %s, Error: %s" % ('localhost', self.test_object.getConfigurationValue("port"), etype, evalue)
         connection_succeeded = False
     self.assertTrue(connection_succeeded)
     expected_ret_val = DictUtils.getDefaultEventDict({'data': "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever."})
     expected_ret_val.pop('lumbermill')
     event = False
     time.sleep(2)
     counter = 0
     for event in self.receiver.getEvent():
         counter += 1
     self.assertTrue(event is not False)
     self.assertEqual(counter, 1500)
     event.pop('lumbermill')
     self.assertDictEqual(event, expected_ret_val)
     self.tearDown()
 def testSelectedFields(self):
     self.test_object.configure({'nodes': [self.es_server],
                                 'fields': ['sheep'],
                                 'doc_id': '$(id)',
                                 'doc_type': '$(type)',
                                 'batch_size': 1})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     timestring = datetime.datetime.utcnow().strftime('%Y.%m.%d')
     index_name = 'lumbermill-%s' % timestring
     try:
         self.es.indices.delete(index=index_name, ignore=[400, 404])
     except:
         pass
     self.es.indices.create(index=index_name)
     event = DictUtils.getDefaultEventDict({'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered.",
                                            'sheep': {'flying': 'scotsman',
                                                      'id': '12345',
                                                      'type': 'pirate'}})
     doc_id = event['sheep.id']
     self.test_object.receiveEvent(event)
     self.test_object.shutDown()
     time.sleep(1)
     try:
         result = self.es.get(index=index_name, doc_type='pirate', id=doc_id)
     except elasticsearch.exceptions.NotFoundError, e:
         self.fail(e)
示例#33
0
 def onReceive(self, data):
     data = data[0]
     if self.separator:
         topic, data = data.split(self.separator)
     event = DictUtils.getDefaultEventDict({"data": data},
                                           caller_class_name="ZmqTornado")
     self.sendEvent(event)
示例#34
0
 def testAddGeoInfoFromListField(self):
     self.test_object.configure({'geo_info_fields': ['country_code'],
                                 'source_fields': ['x_forwarded_for']})
     self.checkConfiguration()
     dict = DictUtils.getDefaultEventDict({'x_forwarded_for': ['99.124.167.129']})
     for event in self.test_object.handleEvent(dict):
         self.assertEqual(event['geo_info']['country_code'], 'US')
示例#35
0
 def testTcpConnection(self):
     self.test_object.configure({})
     self.checkConfiguration()
     self.test_object.initAfterFork()
     self.startTornadoEventLoop()
     # Give server process time to startup.
     time.sleep(.1)
     try:
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         s.settimeout(1)
         s.connect(('localhost', self.test_object.getConfigurationValue('port')))
         for _ in range(0, 1500):
             s.sendall("Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever.\n")
         s.close()
         connection_succeeded = True
     except:
         etype, evalue, etb = sys.exc_info()
         print "Could not connect to %s:%s. Exception: %s, Error: %s" % ( 'localhost', self.test_object.getConfigurationValue("port"), etype, evalue)
         connection_succeeded = False
     self.assertTrue(connection_succeeded)
     expected_ret_val = DictUtils.getDefaultEventDict({'data': "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever."})
     expected_ret_val.pop('lumbermill')
     event = False
     time.sleep(2)
     counter = 0
     for event in self.receiver.getEvent():
         counter += 1
     self.assertTrue(event != False)
     self.assertEqual(counter, 1500)
     event.pop('lumbermill')
     self.assertDictEqual(event, expected_ret_val)
示例#36
0
 def testNoop(self):
     self.test_object.configure()
     self.checkConfiguration()
     event = DictUtils.getDefaultEventDict({})
     event_received = None
     for event_received in self.test_object.handleEvent(event):
         self.assertEquals(event, event_received)
     self.assertIsNotNone(event_received)
示例#37
0
 def testTarpit(self):
     self.test_object.configure({'delay': 1})
     self.checkConfiguration()
     before = time.time()
     self.test_object.handleEvent(DictUtils.getDefaultEventDict({}))
     for event in self.receiver.getEvent():
         after = time.time()
         self.assertEquals(1, int(after - before))
示例#38
0
 def testDefaultValues(self):
     self.test_object.configure({'field_extraction_patterns': [{'http_access_log': '(?P<remote_ip>\d+\.\d+\.\d+\.\d+)\s+(?P<identd>\w+|-)\s+(?P<user>\w+|-)\s+\[(?P<datetime>\d+\/\w+\/\d+:\d+:\d+:\d+\s.\d+)\]\s+\"(?P<url>.*)\"\s+(?P<http_status>\d+)\s+(?P<bytes_send>\d+)'}]})
     self.checkConfiguration()
     data = DictUtils.getDefaultEventDict({'data': self.raw_data})
     event = None
     for event in self.test_object.handleEvent(data):
         self.assert_('bytes_send' in event and event['bytes_send'] == '3395')
     self.assertIsNotNone(event)
示例#39
0
 def test(self):
     self.test_object.configure({})
     self.checkConfiguration()
     self.test_object.receiveEvent(DictUtils.getDefaultEventDict({'/dev/null': '"Spam! Spam! Spam! Lovely Spam! Spam! Spam!"'}))
     got_event = False
     for event in self.receiver.getEvent():
         got_event = True
     self.assertFalse(got_event)
 def eventsInQueuesStatistics(self):
     if len(self.module_queues) == 0:
         return
     self.logger.info(">> Queue stats")
     for module_name, queue in sorted(self.module_queues.items()):
         self.logger.info("Events in %s queue: %s%s%s" % (module_name, AnsiColors.YELLOW, queue.qsize(), AnsiColors.ENDC))
         if self.emit_as_event:
             self.sendEvent(DictUtils.getDefaultEventDict({"queue_count": queue.qsize(),  "field_name": "queue_counts", "interval": self.interval }, caller_class_name="Statistics", event_type="statistic"))
示例#41
0
 def testHandleEvent(self):
     self.test_object.configure({'source_field': 'uri'})
     self.checkConfiguration()
     data = DictUtils.getDefaultEventDict(
         {'uri': 'http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty'})
     for event in self.test_object.handleEvent(data):
         self.assert_('uri' in event
                      and event['uri']['query'] == 'gambol=putty')
示例#42
0
 def testAddGeoInfo(self):
     self.test_object.configure({'source_fields': ['f1', 'f2'],
                                 'target_field': 'geoip',
                                 'geo_info_fields': ['country_code']})
     self.checkConfiguration()
     dict = DictUtils.getDefaultEventDict({'f2': '99.124.167.129'})
     for event in self.test_object.handleEvent(dict):
         self.assertEqual(event['geoip']['country_code'], 'US')
示例#43
0
 def testDynamicQueryTargetField(self):
     self.test_object.configure({'url': '$(schema)://$(host)',
                                 'target_field': 'Johann Gambolputty'})
     self.checkConfiguration()
     data_dict = DictUtils.getDefaultEventDict({'schema': 'http',
                                           'host': 'www.google.com'})
     for event in self.test_object.handleEvent(data_dict):
         self.assertTrue('Johann Gambolputty' in event and len(event['Johann Gambolputty']) > 0)
示例#44
0
 def sendFacetEventToReceivers(self, facet_data):
     event = DictUtils.getDefaultEventDict({'facet_field': self.source_field,
                                       'facet_count': len(facet_data['facets']),
                                       'facets': facet_data['facets']},
                                       caller_class_name=self.__class__.__name__,
                                       event_type='facet')
     event['other_event_fields'] = facet_data['other_event_fields']
     self.sendEvent(event)
示例#45
0
 def testDecodeOfNestedSourceField(self):
     self.test_object.configure({'source_fields': ['swallow.json_data']})
     self.checkConfiguration()
     data = DictUtils.getDefaultEventDict({'swallow': {'json_data': '{"South African": "Fast", "unladen": "swallow"}'}})
     event = None
     for event in self.test_object.handleEvent(data):
         self.assertTrue('South African' in event and event['South African'] == "Fast")
     self.assertIsNotNone(event)
示例#46
0
 def testIsTimeStamp(self):
     self.test_object.configure({})
     self.checkConfiguration()
     for event in self.test_object.handleEvent(
             DictUtils.getDefaultEventDict({})):
         self.assert_(
             re.match('^\d+-\d+-\d+T\d+:\d+:\d+$',
                      event['@timestamp']))  # 2013-08-29T10:25:26
示例#47
0
 def testBase64Decode(self):
     config = {'action': 'decode'}
     self.test_object.configure(config)
     self.checkConfiguration()
     payload = 'SSBjdXQgZG93biB0cmVlcywgSSBza2lwIGFuZCBqdW1wLCBJIGxpa2UgdG8gcHJlc3Mgd2lsZCBmbG93ZXJzLiBJIHB1dCBvbiB3b21lbidzIGNsb3RoaW5nIGFuZCBoYW5nIGFyb3VuZCBpbiBiYXJzLg=='
     data = DictUtils.getDefaultEventDict({'data': payload})
     for event in self.test_object.handleEvent(data):
         self.assertTrue(event['data'] ==  "I cut down trees, I skip and jump, I like to press wild flowers. I put on women's clothing and hang around in bars.")
示例#48
0
 def run(self):
     counter = 0
     while self.alive:
         for event_data in self.events:
             if isinstance(event_data, str):
                 event = DictUtils.getDefaultEventDict({'data': event_data}, caller_class_name=self.__class__.__name__)
             elif isinstance(event_data, dict):
                 event = DictUtils.getDefaultEventDict(event_data, caller_class_name=self.__class__.__name__) # self.getConfigurationValue("event")
             self.sendEvent(event)
             if self.sleep > 0:
                 time.sleep(self.sleep)
             if self.max_events_count == 0:
                 continue
             counter += 1
             if (counter - self.max_events_count == 0):
                 time.sleep(2)
                 self.alive = False
     self.lumbermill.shutDown()
示例#49
0
 def testAddGeoInfo(self):
     self.test_object.configure({'source_fields': ['f1','f2'],
                                 'geoip_dat_path': './test_data/GeoLiteCity.dat',
                                 'target_field': 'geoip',
                                 'geo_info_fields': ['country_code']})
     self.checkConfiguration()
     dict = DictUtils.getDefaultEventDict({'f2': '99.124.167.129'})
     for event in self.test_object.handleEvent(dict):
         self.assertEqual(event['geoip']['country_code'], 'US')
示例#50
0
 def testTarpit(self):
     self.test_object.configure({'delay': 1})
     self.checkConfiguration()
     before = time.time()
     event = None
     for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({})):
         after = time.time()
         self.assertEquals(1, int(after - before))
     self.assertIsNotNone(event)
示例#51
0
 def testGetMetaData(self):
     self.test_object.configure({'url': 'http://www.google.com',
                                 'get_metadata': True})
     self.checkConfiguration()
     event = None
     for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({})):
         self.assertTrue('http_request_result' in event and len(event['http_request_result']) > 0)
     self.assertIsNotNone(event)
     self.assertTrue(len(event['http_request_result']['headers']) > 0)
 def receiveRateStatistics(self):
     self.logger.info(">> Receive rate stats")
     events_received = MultiProcessStatisticCollector().getCounter('events_received')
     if not events_received:
         events_received = 0
     MultiProcessStatisticCollector().resetCounter('events_received')
     self.logger.info("Received events in %ss: %s%s (%s/eps)%s" % (self.getConfigurationValue('interval'), AnsiColors.YELLOW, events_received, (events_received/self.interval), AnsiColors.ENDC))
     if self.emit_as_event:
         self.sendEvent(DictUtils.getDefaultEventDict({"total_count": events_received, "count_per_sec": (events_received/self.interval), "field_name": "all_events", "interval": self.interval }, caller_class_name="Statistics", event_type="statistic"))
 def testDateTimeParser(self):
     config = {'source_field': 'date',
               'source_date_pattern': '%d/%b/%Y',
               'target_date_pattern': '%d-%b-%Y'}
     self.test_object.configure(config)
     self.checkConfiguration()
     data = DictUtils.getDefaultEventDict({'date': '13/Sep/2017'})
     for event in self.test_object.handleEvent(data):
         self.assertTrue(event['date'] == "13-Sep-2017")
示例#54
0
 def handleEvent(self, event):
     if event[0] != 'message':
         return
     yield DictUtils.getDefaultEventDict(
         dict={
             "received_from": '%s' % event[1],
             "data": event[2]
         },
         caller_class_name=self.__class__.__name__)
示例#55
0
 def testQueryTargetField(self):
     self.test_object.configure({
         'url': 'http://www.google.com',
         'target_field': 'Johann Gambolputty'
     })
     self.checkConfiguration()
     for event in self.test_object.handleEvent(
             DictUtils.getDefaultEventDict({'TreeNodeID': '1'})):
         self.assertTrue('Johann Gambolputty' in event
                         and len(event['Johann Gambolputty']) > 0)
示例#56
0
 def testDecodeLineMode(self):
     self.test_object.configure({'mode': 'line'})
     self.checkConfiguration()
     data = {'spam': 'spam' * 16384}
     msg_packed_data = msgpack.packb(data)
     dict = DictUtils.getDefaultEventDict({'data': msg_packed_data})
     event = None
     for event in self.test_object.handleEvent(dict):
         self.assertEquals(event['spam'], data['spam'])
     self.assertIsNotNone(event)
示例#57
0
 def printIntervalStatistics(self):
     last_field_name = None
     field_counts = {}
     total_count = 0
     for field_name_value, field_count in sorted(
             self.stats_collector.getAllCounters().items()):
         if not isinstance(field_name_value, tuple):
             continue
         field_name, field_value = field_name_value
         if field_name not in self.fields:
             continue
         self.stats_collector.resetCounter(field_name_value)
         if not last_field_name:
             last_field_name = field_name
         if field_name != last_field_name:
             self.sendEvent(
                 DictUtils.getDefaultEventDict(
                     {
                         "total_count": total_count,
                         "field_name": last_field_name,
                         "field_counts": field_counts,
                         "interval": self.interval
                     },
                     caller_class_name="Statistics",
                     event_type="statistic"))
             last_field_name = field_name
             field_counts = {}
             total_count = 0
         field_counts.update({field_value: field_count})
         total_count += field_count
     # Send remaining.
     if last_field_name:
         self.sendEvent(
             DictUtils.getDefaultEventDict(
                 {
                     "total_count": total_count,
                     "field_name": field_name,
                     "field_counts": field_counts,
                     "interval": self.interval
                 },
                 caller_class_name="Statistics",
                 event_type="statistic"))
 def testDefaultMappedSyslogPrivalFields(self):
     config = {'source_field': 'syslog_prival',
               'map_values': True}
     self.test_object.configure(config)
     self.checkConfiguration()
     data = DictUtils.getDefaultEventDict({'syslog_prival': '13', 'data': 'This is an ex parrot!'})
     event = False
     for event in self.test_object.handleEvent(data):
         self.assertTrue('syslog_severity' in event and event['syslog_severity'] == "Notice" )
         self.assertTrue('syslog_facility' in event and event['syslog_facility'] == "user-level" )
     self.assertTrue(event != False)
示例#59
0
 def testUserAgentSingleSourceField(self):
     self.test_object.configure({'source_fields': 'user_agent'})
     self.checkConfiguration()
     event = DictUtils.getDefaultEventDict({
         'user_agent':
         "Mozilla/5.0 (Windows NT 6.0; rv:33.0) Gecko/20100101 Firefox/33.0"
     })
     for event in self.test_object.handleEvent(event):
         self.assert_('user_agent_info' in event
                      and event['user_agent_info']['user_agent']['family']
                      == "Firefox")
示例#60
0
 def testBase64Decode(self):
     config = {'action': 'decode'}
     self.test_object.configure(config)
     self.checkConfiguration()
     payload = 'SSBjdXQgZG93biB0cmVlcywgSSBza2lwIGFuZCBqdW1wLCBJIGxpa2UgdG8gcHJlc3Mgd2lsZCBmbG93ZXJzLiBJIHB1dCBvbiB3b21lbidzIGNsb3RoaW5nIGFuZCBoYW5nIGFyb3VuZCBpbiBiYXJzLg=='
     data = DictUtils.getDefaultEventDict({'data': payload})
     for event in self.test_object.handleEvent(data):
         self.assertTrue(
             event['data'] ==
             "I cut down trees, I skip and jump, I like to press wild flowers. I put on women's clothing and hang around in bars."
         )