def setUp(self): super().setUp() # Create retrieval manager self.retrieval_manager = PeriodicRetrievalManager(RETRIEVAL_PERIOD, self.update_mapper, self.logger)
def run(config_location): # Load config config = load_config(os.path.join(config_location, "setup.conf")) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP(S) connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16)) patch_connection_pools(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database, config.cookie_jar.buffer_capacity, config.cookie_jar.buffer_latency, verify=config.cookie_jar.cacert) add_cookie_jar_logging(cookie_jar, logger) add_couchdb_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) slack = None # Setup rule output log file writer rule_log_writer = RuleOutputWriter(config.output.log_file) # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_retrieval_manager_to_since_file(retrieval_manager, config_location) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.inject(APIDependency.System, None) api.listen(config.api.port) # Start the retrieval manager from the last known successful # retrieval time (or invocation time, otherwise) try: with open(os.path.join(config_location, "since"), "r") as f: since_time = datetime.fromtimestamp(int(f.read())) except: since_time = datetime.now() retrieval_manager.start(since_time) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
class TestPeriodicRetrievalManager(_BaseRetrievalManagerTest): """ Test cases for `PeriodicRetrievalManager`. """ def setUp(self): super().setUp() # Create retrieval manager self.retrieval_manager = PeriodicRetrievalManager(RETRIEVAL_PERIOD, self.update_mapper, self.logger) @unittest.skip("Flaky test") def test_run(self): cycles = 10 listener = MagicMock() self.logger.add = MagicMock() self.retrieval_manager.add_listener(listener) self._setup_to_do_n_cycles(cycles, self.updates) self.assertEqual(self.logger.record.call_count, cycles) listener.assert_has_calls([call(self.updates) for _ in range(cycles)]) def test_run_if_running(self): Thread(target=self.retrieval_manager.run).start() self.assertRaises(RuntimeError, self.retrieval_manager.run) def test_stop_and_then_restart(self): self.retrieval_manager.start() self.retrieval_manager.stop() self.retrieval_manager.start() def _setup_to_do_n_cycles(self, number_of_cycles: int, updates_each_cycle: UpdateCollection=None): """ Sets up the test so that the retriever will only do n cycles. :param number_of_cycles: the number of cycles to do """ if updates_each_cycle is None: updates_each_cycle = UpdateCollection([]) semaphore = Semaphore(0) lock_until_counted = Lock() lock_until_counted.acquire() def increase_counter(*args) -> UpdateCollection: semaphore.release() lock_until_counted.acquire() return updates_each_cycle self.retrieval_manager.update_mapper.get_all_since.side_effect = increase_counter self.retrieval_manager.start() run_counter = 0 while run_counter < number_of_cycles: semaphore.acquire() run_counter += 1 lock_until_counted.release() if run_counter == number_of_cycles: self.retrieval_manager.stop() self.retrieval_manager.update_mapper.get_all_since.side_effect = None def tearDown(self): self.retrieval_manager.stop()
def run(config_location): # Load config config = load_config(config_location) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16)) patch_http_connection_pool(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database) add_cookie_jar_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) # # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) slack = message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.listen(config.api.port) # Start the retrieval manager retrieval_manager.start(config.retrieval.since) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()