class TestRuleSource(unittest.TestCase): """ Tests for `RuleSource`. """ def setUp(self): self.source = RuleSource("/") def test_is_data_file_when_is(self): self.assertTrue(self.source.is_data_file("/my/file.rule.py")) def test_is_data_file_when_is_not(self): self.assertFalse(self.source.is_data_file("/my/file.py"))
def setUp(self): self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__) self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__) self.resource_accessor = StubContext() # Setup enrichment self.enrichment_loader_source = EnrichmentLoaderSource( self.enrichment_loaders_directory, self.resource_accessor) self.enrichment_loader_source.start() # Setup cookie jar self.cookie_jar = create_magic_mock_cookie_jar() # Setup rules source self.rules_source = RuleSource(self.rules_directory, self.resource_accessor) self.rules_source.start() # Setup the data processor manager self.processor_manager = BasicProcessorManager( self.cookie_jar, self.rules_source, self.enrichment_loader_source) def cookie_jar_connector(*args): self.processor_manager.process_any_cookies() self.cookie_jar.add_listener(cookie_jar_connector)
def run(config_location): # Load config config = load_config(os.path.join(config_location, "setup.conf")) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP(S) connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16)) patch_connection_pools(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database, config.cookie_jar.buffer_capacity, config.cookie_jar.buffer_latency, verify=config.cookie_jar.cacert) add_cookie_jar_logging(cookie_jar, logger) add_couchdb_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) slack = None # Setup rule output log file writer rule_log_writer = RuleOutputWriter(config.output.log_file) # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_retrieval_manager_to_since_file(retrieval_manager, config_location) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.inject(APIDependency.System, None) api.listen(config.api.port) # Start the retrieval manager from the last known successful # retrieval time (or invocation time, otherwise) try: with open(os.path.join(config_location, "since"), "r") as f: since_time = datetime.fromtimestamp(int(f.read())) except: since_time = datetime.now() retrieval_manager.start(since_time) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
def run(config_location): # Load config config = load_config(config_location) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16)) patch_http_connection_pool(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database) add_cookie_jar_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) # # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) slack = message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.listen(config.api.port) # Start the retrieval manager retrieval_manager.start(config.retrieval.since) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
class TestIntegration(unittest.TestCase): """ Integration tests for processor. """ _NUMBER_OF_COOKIES = 250 _NUMBER_OF_PROCESSORS = 10 def setUp(self): self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__) self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__) self.resource_accessor = StubContext() # Setup enrichment self.enrichment_loader_source = EnrichmentLoaderSource( self.enrichment_loaders_directory, self.resource_accessor) self.enrichment_loader_source.start() # Setup cookie jar self.cookie_jar = create_magic_mock_cookie_jar() # Setup rules source self.rules_source = RuleSource(self.rules_directory, self.resource_accessor) self.rules_source.start() # Setup the data processor manager self.processor_manager = BasicProcessorManager( self.cookie_jar, self.rules_source, self.enrichment_loader_source) def cookie_jar_connector(*args): self.processor_manager.process_any_cookies() self.cookie_jar.add_listener(cookie_jar_connector) def tearDown(self): shutil.rmtree(self.rules_directory) shutil.rmtree(self.enrichment_loaders_directory) def test_with_no_rules_or_enrichments(self): cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) block_until_processed(self.cookie_jar, cookie_ids, TestIntegration._NUMBER_OF_COOKIES) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, len(cookie_ids)) self.cookie_jar.mark_as_failed.assert_not_called() # TODO: Call if no rules match and no further enrichments? @unittest.skip("Flaky test") def test_with_no_rules_but_enrichments(self): add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS) TestIntegration._NUMBER_OF_COOKIES = 1 cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE) expected_number_of_times_processed = len(cookie_ids) * 2 block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed) self.cookie_jar.mark_as_failed.assert_not_called() enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all()) enrichment_loader_checker.assert_call_counts( NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0) enrichment_loader_checker.assert_call_counts( HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids)) enrichment_loader_checker.assert_call_counts( NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1) # TODO: Call if no rules match and no further enrichments? @unittest.skip("Flaky test") def test_with_rules_but_no_enrichments(self): add_data_files(self.rules_source, _RULE_FILE_LOCATIONS) cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) cookie_ids.append(NAME_RULE_MATCH_COOKIE) expected_number_of_times_processed = len(cookie_ids) block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed) self.cookie_jar.mark_as_failed.assert_not_called() rule_checker = RuleChecker(self, self.rules_source.get_all()) rule_checker.assert_call_counts( ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed) rule_checker.assert_call_counts( NO_MATCH_RULE_ID, expected_number_of_times_processed, 0) rule_checker.assert_call_counts( NAME_MATCH_RULE_ID, expected_number_of_times_processed, 1) rule_checker.assert_call_counts( HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, 0) @unittest.skip("Flaky test") def test_with_rules_and_enrichments(self): add_data_files(self.rules_source, _RULE_FILE_LOCATIONS) assert len(self.rules_source.get_all()) == len(_RULE_FILE_LOCATIONS) add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS) assert len(self.enrichment_loader_source.get_all()) == len(_ENRICHMENT_LOADER_LOCATIONS) cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE) cookie_ids.append(NAME_RULE_MATCH_COOKIE) expected_number_of_times_processed = len(cookie_ids) * 2 + 1 block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed) self.cookie_jar.mark_as_failed.assert_not_called() rule_checker = RuleChecker(self, self.rules_source.get_all()) rule_checker.assert_call_counts( ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed) rule_checker.assert_call_counts( NO_MATCH_RULE_ID, expected_number_of_times_processed, 0) rule_checker.assert_call_counts( NAME_MATCH_RULE_ID, expected_number_of_times_processed, len(_ENRICHMENT_LOADER_LOCATIONS) - 1) rule_checker.assert_call_counts( HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, len(cookie_ids)) enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all()) enrichment_loader_checker.assert_call_counts( NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0) enrichment_loader_checker.assert_call_counts( HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids)) enrichment_loader_checker.assert_call_counts( NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1)
def setUp(self): self.source = RuleSource("/")