def test_process_any_cookies_when_no_processing_resources(self): processor_manager = BasicProcessorManager( self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders)) complete = Semaphore(0) def on_complete(*args): complete.release() self.cookie_jar.mark_as_complete = MagicMock(side_effect=on_complete) rule_lock = Semaphore(0) match_lock = Lock() match_lock.acquire() def matching_criteria(cookie: Cookie, _:Context) -> bool: match_lock.release() rule_lock.acquire() return True self.rules.append(Rule(matching_criteria, lambda *args: True, RULE_IDENTIFIER)) self.cookie_jar.mark_for_processing(self.cookie.identifier) processor_manager.process_any_cookies() match_lock.acquire() # Processor should have locked at this point - i.e. 0 free processors self.cookie_jar.mark_for_processing("/other/cookie") processor_manager.process_any_cookies() # The fact that there are more cookies should be "remembered" by the processor manager # Change the rules for the next cookie to be processed self.rules.pop() rule_execute_monitor = MagicMock(return_value=False) self.rules.append(Rule(lambda *args: True, rule_execute_monitor, RULE_IDENTIFIER)) # Free the processor to complete the first cookie rule_lock.release() rule_lock.release() # Wait for both cookies to be processed completed = 0 while completed != 2: complete.acquire() completed += 1 self.cookie_jar.mark_as_complete.assert_has_calls([call(self.cookie.identifier), call("/other/cookie")]) assert len(self.rules) == 1 self.assertEqual(rule_execute_monitor.call_count, 1)
def setUp(self): self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__) self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__) self.resource_accessor = StubContext() # Setup enrichment self.enrichment_loader_source = EnrichmentLoaderSource( self.enrichment_loaders_directory, self.resource_accessor) self.enrichment_loader_source.start() # Setup cookie jar self.cookie_jar = create_magic_mock_cookie_jar() # Setup rules source self.rules_source = RuleSource(self.rules_directory, self.resource_accessor) self.rules_source.start() # Setup the data processor manager self.processor_manager = BasicProcessorManager( self.cookie_jar, self.rules_source, self.enrichment_loader_source) def cookie_jar_connector(*args): self.processor_manager.process_any_cookies() self.cookie_jar.add_listener(cookie_jar_connector)
def setUp(self): self.cookie_jar = create_magic_mock_cookie_jar() self.rules = [] self.enrichment_loaders = [] self.notifications = [Notification("a", "b"), Notification("c", "d")] self.cookie = Cookie(COOKIE_IDENTIFIER) self.enrichment_loaders = self.enrichment_loaders self.processor_manager = BasicProcessorManager( self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders))
class TestBasicProcessorManager(unittest.TestCase): """ Tests for `BasicProcessorManager`. """ def setUp(self): self.cookie_jar = create_magic_mock_cookie_jar() self.rules = [] self.enrichment_loaders = [] self.notifications = [Notification("a", "b"), Notification("c", "d")] self.cookie = Cookie(COOKIE_IDENTIFIER) self.enrichment_loaders = self.enrichment_loaders self.processor_manager = BasicProcessorManager( self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders)) def test_init_with_less_than_one_thread(self): self.assertRaises( ValueError, BasicProcessorManager, self.cookie_jar, ListDataSource(self.rules), self.enrichment_loaders, 0) def test_process_any_cookies_when_no_jobs(self): complete = Lock() complete.acquire() def on_get_next_for_processing(*args): complete.release() self.cookie_jar.get_next_for_processing = MagicMock(side_effect=on_get_next_for_processing) self.processor_manager.process_any_cookies() complete.acquire() self.cookie_jar.get_next_for_processing.assert_called_once_with() self.cookie_jar.mark_as_complete.assert_not_called() def test_process_any_cookies_when_jobs(self): complete = Semaphore(0) def on_complete(*args): complete.release() self.cookie_jar.mark_as_complete = MagicMock(side_effect=on_complete) self.rules.append(Rule(lambda *args: True, lambda *args: True, RULE_IDENTIFIER)) number_to_process = 100 for i in range(number_to_process): self.cookie_jar.mark_for_processing("%s/%s" % (COOKIE_IDENTIFIER, i)) Thread(target=self.processor_manager.process_any_cookies).start() completed = 0 while completed != number_to_process: complete.acquire() completed += 1 def test_process_any_cookies_when_no_processing_resources(self): processor_manager = BasicProcessorManager( self.cookie_jar, ListDataSource(self.rules), ListDataSource(self.enrichment_loaders)) complete = Semaphore(0) def on_complete(*args): complete.release() self.cookie_jar.mark_as_complete = MagicMock(side_effect=on_complete) rule_lock = Semaphore(0) match_lock = Lock() match_lock.acquire() def matching_criteria(cookie: Cookie, _:Context) -> bool: match_lock.release() rule_lock.acquire() return True self.rules.append(Rule(matching_criteria, lambda *args: True, RULE_IDENTIFIER)) self.cookie_jar.mark_for_processing(self.cookie.identifier) processor_manager.process_any_cookies() match_lock.acquire() # Processor should have locked at this point - i.e. 0 free processors self.cookie_jar.mark_for_processing("/other/cookie") processor_manager.process_any_cookies() # The fact that there are more cookies should be "remembered" by the processor manager # Change the rules for the next cookie to be processed self.rules.pop() rule_execute_monitor = MagicMock(return_value=False) self.rules.append(Rule(lambda *args: True, rule_execute_monitor, RULE_IDENTIFIER)) # Free the processor to complete the first cookie rule_lock.release() rule_lock.release() # Wait for both cookies to be processed completed = 0 while completed != 2: complete.acquire() completed += 1 self.cookie_jar.mark_as_complete.assert_has_calls([call(self.cookie.identifier), call("/other/cookie")]) assert len(self.rules) == 1 self.assertEqual(rule_execute_monitor.call_count, 1)
def run(config_location): # Load config config = load_config(os.path.join(config_location, "setup.conf")) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP(S) connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16)) patch_connection_pools(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database, config.cookie_jar.buffer_capacity, config.cookie_jar.buffer_latency, verify=config.cookie_jar.cacert) add_cookie_jar_logging(cookie_jar, logger) add_couchdb_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) slack = None # Setup rule output log file writer rule_log_writer = RuleOutputWriter(config.output.log_file) # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_retrieval_manager_to_since_file(retrieval_manager, config_location) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.inject(APIDependency.System, None) api.listen(config.api.port) # Start the retrieval manager from the last known successful # retrieval time (or invocation time, otherwise) try: with open(os.path.join(config_location, "since"), "r") as f: since_time = datetime.fromtimestamp(int(f.read())) except: since_time = datetime.now() retrieval_manager.start(since_time) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
def run(config_location): # Load config config = load_config(config_location) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16)) patch_http_connection_pool(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database) add_cookie_jar_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) # # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) slack = message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.listen(config.api.port) # Start the retrieval manager retrieval_manager.start(config.retrieval.since) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
class TestIntegration(unittest.TestCase): """ Integration tests for processor. """ _NUMBER_OF_COOKIES = 250 _NUMBER_OF_PROCESSORS = 10 def setUp(self): self.rules_directory = mkdtemp(prefix="rules", suffix=TestIntegration.__name__) self.enrichment_loaders_directory = mkdtemp(prefix="enrichment_loaders", suffix=TestIntegration.__name__) self.resource_accessor = StubContext() # Setup enrichment self.enrichment_loader_source = EnrichmentLoaderSource( self.enrichment_loaders_directory, self.resource_accessor) self.enrichment_loader_source.start() # Setup cookie jar self.cookie_jar = create_magic_mock_cookie_jar() # Setup rules source self.rules_source = RuleSource(self.rules_directory, self.resource_accessor) self.rules_source.start() # Setup the data processor manager self.processor_manager = BasicProcessorManager( self.cookie_jar, self.rules_source, self.enrichment_loader_source) def cookie_jar_connector(*args): self.processor_manager.process_any_cookies() self.cookie_jar.add_listener(cookie_jar_connector) def tearDown(self): shutil.rmtree(self.rules_directory) shutil.rmtree(self.enrichment_loaders_directory) def test_with_no_rules_or_enrichments(self): cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) block_until_processed(self.cookie_jar, cookie_ids, TestIntegration._NUMBER_OF_COOKIES) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, len(cookie_ids)) self.cookie_jar.mark_as_failed.assert_not_called() # TODO: Call if no rules match and no further enrichments? @unittest.skip("Flaky test") def test_with_no_rules_but_enrichments(self): add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS) TestIntegration._NUMBER_OF_COOKIES = 1 cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE) expected_number_of_times_processed = len(cookie_ids) * 2 block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed) self.cookie_jar.mark_as_failed.assert_not_called() enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all()) enrichment_loader_checker.assert_call_counts( NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0) enrichment_loader_checker.assert_call_counts( HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids)) enrichment_loader_checker.assert_call_counts( NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1) # TODO: Call if no rules match and no further enrichments? @unittest.skip("Flaky test") def test_with_rules_but_no_enrichments(self): add_data_files(self.rules_source, _RULE_FILE_LOCATIONS) cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) cookie_ids.append(NAME_RULE_MATCH_COOKIE) expected_number_of_times_processed = len(cookie_ids) block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed) self.cookie_jar.mark_as_failed.assert_not_called() rule_checker = RuleChecker(self, self.rules_source.get_all()) rule_checker.assert_call_counts( ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed) rule_checker.assert_call_counts( NO_MATCH_RULE_ID, expected_number_of_times_processed, 0) rule_checker.assert_call_counts( NAME_MATCH_RULE_ID, expected_number_of_times_processed, 1) rule_checker.assert_call_counts( HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, 0) @unittest.skip("Flaky test") def test_with_rules_and_enrichments(self): add_data_files(self.rules_source, _RULE_FILE_LOCATIONS) assert len(self.rules_source.get_all()) == len(_RULE_FILE_LOCATIONS) add_data_files(self.enrichment_loader_source, _ENRICHMENT_LOADER_LOCATIONS) assert len(self.enrichment_loader_source.get_all()) == len(_ENRICHMENT_LOADER_LOCATIONS) cookie_ids = _generate_cookie_ids(TestIntegration._NUMBER_OF_COOKIES) cookie_ids.append(NAME_ENRICHMENT_LOADER_MATCH_COOKIE) cookie_ids.append(NAME_RULE_MATCH_COOKIE) expected_number_of_times_processed = len(cookie_ids) * 2 + 1 block_until_processed(self.cookie_jar, cookie_ids, expected_number_of_times_processed) self.assertEqual(self.cookie_jar.mark_as_complete.call_count, expected_number_of_times_processed) self.cookie_jar.mark_as_failed.assert_not_called() rule_checker = RuleChecker(self, self.rules_source.get_all()) rule_checker.assert_call_counts( ALL_MATCH_RULE_ID, expected_number_of_times_processed, expected_number_of_times_processed) rule_checker.assert_call_counts( NO_MATCH_RULE_ID, expected_number_of_times_processed, 0) rule_checker.assert_call_counts( NAME_MATCH_RULE_ID, expected_number_of_times_processed, len(_ENRICHMENT_LOADER_LOCATIONS) - 1) rule_checker.assert_call_counts( HASH_ENRICHED_MATCH_RULE_ID, expected_number_of_times_processed, len(cookie_ids)) enrichment_loader_checker = EnrichmentLoaderChecker(self, self.enrichment_loader_source.get_all()) enrichment_loader_checker.assert_call_counts( NO_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 0) enrichment_loader_checker.assert_call_counts( HASH_ENRICHMENT_LOADER_ID, expected_number_of_times_processed - 1, len(cookie_ids)) enrichment_loader_checker.assert_call_counts( NAME_MATCH_LOADER_ENRICHMENT_LOADER_ID, expected_number_of_times_processed, 1)