def setUp(self): """ Test set up: * Build, if necessary, and start a CouchDB container and connect as a BiscuitTin instance * Start the HTTP API service on a free port, with the necessary dependencies injected * Create an HTTP client connection to the API service """ self.couchdb_container = CouchDBContainer() # Configuration for Cookie Jar self.HOST = self.couchdb_container.couchdb_fqdn self.DB = 'elmo-test' self.jar = BiscuitTin(self.HOST, self.DB, 1, timedelta(0)) # Configuration for HTTP service self.API_PORT = get_open_port() self.api = HTTP_API() self.api.inject(APIDependency.CookieJar, self.jar) self.api.inject(APIDependency.System, None) self.api.listen(self.API_PORT) self.http = HTTPConnection('localhost', self.API_PORT) self.REQ_HEADER = {'Accept': 'application/json'} # Block until service is up (or timeout) start_time = finish_time = datetime.now() service_up = False while finish_time - start_time < timedelta(seconds=5): response = None try: self.http.request('HEAD', '/') response = self.http.getresponse() except: sleep(0.1) finally: self.http.close() finish_time = datetime.now() if isinstance(response, HTTPResponse): service_up = True break if not service_up: self.tearDown() raise ConnectionError('Couldn\'t start API service in a reasonable amount of time')
def run(config_location): # Load config config = load_config(os.path.join(config_location, "setup.conf")) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP(S) connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16)) patch_connection_pools(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database, config.cookie_jar.buffer_capacity, config.cookie_jar.buffer_latency, verify=config.cookie_jar.cacert) add_cookie_jar_logging(cookie_jar, logger) add_couchdb_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) slack = None # Setup rule output log file writer rule_log_writer = RuleOutputWriter(config.output.log_file) # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_retrieval_manager_to_since_file(retrieval_manager, config_location) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.inject(APIDependency.System, None) api.listen(config.api.port) # Start the retrieval manager from the last known successful # retrieval time (or invocation time, otherwise) try: with open(os.path.join(config_location, "since"), "r") as f: since_time = datetime.fromtimestamp(int(f.read())) except: since_time = datetime.now() retrieval_manager.start(since_time) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
class TestElmo(unittest.TestCase): def setUp(self): """ Test set up: * Build, if necessary, and start a CouchDB container and connect as a BiscuitTin instance * Start the HTTP API service on a free port, with the necessary dependencies injected * Create an HTTP client connection to the API service """ self.couchdb_container = CouchDBContainer() # Configuration for Cookie Jar self.HOST = self.couchdb_container.couchdb_fqdn self.DB = 'elmo-test' self.jar = BiscuitTin(self.HOST, self.DB, 1, timedelta(0)) # Configuration for HTTP service self.API_PORT = get_open_port() self.api = HTTP_API() self.api.inject(APIDependency.CookieJar, self.jar) self.api.inject(APIDependency.System, None) self.api.listen(self.API_PORT) self.http = HTTPConnection('localhost', self.API_PORT) self.REQ_HEADER = {'Accept': 'application/json'} # Block until service is up (or timeout) start_time = finish_time = datetime.now() service_up = False while finish_time - start_time < timedelta(seconds=5): response = None try: self.http.request('HEAD', '/') response = self.http.getresponse() except: sleep(0.1) finally: self.http.close() finish_time = datetime.now() if isinstance(response, HTTPResponse): service_up = True break if not service_up: self.tearDown() raise ConnectionError('Couldn\'t start API service in a reasonable amount of time') def tearDown(self): """ Tear down test set up """ self.http.close() self.api.stop() self.couchdb_container.tear_down() def test_queue(self): """ HTTP API: GET /queue """ self.http.request('GET', '/queue', headers=self.REQ_HEADER) r = self.http.getresponse() self.assertEqual(r.status, 200) self.assertEqual(r.headers.get_content_type(), 'application/json') data = _decode_json_response(r) self.assertIn('queue_length', data) self.assertEqual(data['queue_length'], self.jar.queue_length()) # Should be 0 self.http.close() # Add item to the queue self.jar.mark_for_processing('/foo') self.http.request('GET', '/queue', headers=self.REQ_HEADER) data = _decode_json_response(self.http.getresponse()) self.assertEqual(data['queue_length'], self.jar.queue_length()) # Should be 1 def test_reprocess(self): """ HTTP API: POST /queue/reprocess """ # Add mocked update notifier to Cookie Jar dirty_cookie_listener = MagicMock() self.jar.add_listener(dirty_cookie_listener) cookie_identifier = '/foo' request = {'identifier': cookie_identifier} self.http.request('POST', '/queue/reprocess', body=json.dumps(request), headers=self.REQ_HEADER) r = self.http.getresponse() self.assertEqual(r.status, 200) self.assertEqual(r.headers.get_content_type(), 'application/json') data = _decode_json_response(r) self.assertEqual(data, request) self.http.close() # Check queue has been updated self.assertEqual(self.jar.queue_length(), 1) self.assertEqual(dirty_cookie_listener.call_count, 1) @staticmethod def _url_for_identifier(identifier:str): """ URL for identifier """ if identifier[0] == "/": return '/cookiejar?identifier={}'.format(identifier) else: return '/cookiejar/{}'.format(identifier) def _fetch_test(self, identifier:str): """ Generic fetch test """ source = 'foobar' timestamp = datetime.now().replace(microsecond=0, tzinfo=timezone.utc) metadata = Metadata({'foo': 123, 'bar': 'quux'}) enrichment = Enrichment(source, timestamp, metadata) self.jar.enrich_cookie(identifier, enrichment) self.http.request('GET', TestElmo._url_for_identifier(identifier), headers=self.REQ_HEADER) r = self.http.getresponse() self.assertEqual(r.status, 200) self.assertEqual(r.headers.get_content_type(), 'application/json') data = _decode_json_response(r) fetched_identifier = data['identifier'] fetched_enrichment = json.loads(json.dumps(data['enrichments']), cls=EnrichmentJSONDecoder)[0] self.assertEqual(fetched_identifier, identifier) self.assertEqual(fetched_enrichment, enrichment) def test_fetch_by_qs(self): """ HTTP API: GET /cookiejar?identifier=<identifier> """ self._fetch_test('/path/to/foo') def test_fetch_by_route(self): """ HTTP API: GET /cookiejar/<identifier> """ self._fetch_test('foo_bar') def _delete_test(self, identifier:str): """ Generic delete test """ self.jar.mark_for_processing(identifier) self.jar.mark_as_complete(identifier) cookie = self.jar.fetch_cookie(identifier) self.assertIsInstance(cookie, Cookie) self.http.request('DELETE', TestElmo._url_for_identifier(identifier), headers=self.REQ_HEADER) r = self.http.getresponse() self.assertEqual(r.status, 200) self.assertEqual(r.headers.get_content_type(), 'application/json') data = _decode_json_response(r) self.assertEqual(data, {'deleted':identifier}) deleted_cookie = self.jar.fetch_cookie(identifier) self.assertIsNone(deleted_cookie) def test_delete_by_qs(self): """ HTTP API: DELETE /cookiejar?identifier=<identifier> """ self._delete_test('/path/to/foo') def test_delete_by_route(self): """ HTTP API: DELETE /cookiejar/<identifier> """ self._delete_test('foo_bar') def test_thread_dump(self): """ HTTP API: GET /debug/threads Note: This test only proves that the endpoint returns an OK response and JSON data. TODO At least validate the returned data's schema """ self.http.request('GET', '/debug/threads', headers=self.REQ_HEADER) r = self.http.getresponse() self.assertEqual(r.status, 200) self.assertEqual(r.headers.get_content_type(), 'application/json')
def run(config_location): # Load config config = load_config(config_location) # Setup measurement logging logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency) influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username, config.influxdb.password, config.influxdb.database) logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency) # Set HTTP connection pool size (for CouchDB) # NOTE This is taken from an environment variable, as it's not # something that would probably need tweaking that much: pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16)) patch_http_connection_pool(maxsize=pool_size) # Setup cookie jar cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url, config.cookie_jar.database) add_cookie_jar_logging(cookie_jar, logger) # Setup data retrieval manager update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone) retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger) # # Setup basic Slack client # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username) # # # Setup basic message queue (e.g. RabbitMQ) client # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port, # config.message_queue.username, config.message_queue.password) slack = message_queue = None # Define the context that rules and enrichment loaders has access to context = HgiContext(cookie_jar, config, slack, message_queue) # Setup rules source rules_source = RuleSource(config.processing.rules_location, context) rules_source.start() # Setup enrichment loader source enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context) enrichment_loader_source.start() # Setup the data processor manager processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source, config.processing.max_threads, logger) # Connect components to the cookie jar _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second, logger) _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar) # Setup the HTTP API api = HTTP_API() api.inject(APIDependency.CookieJar, cookie_jar) api.listen(config.api.port) # Start the retrieval manager retrieval_manager.start(config.retrieval.since) # Start processing of any unprocessed cookies processor_manager.process_any_cookies() # Setup monitors ThreadsMonitor(logger, logging_buffer_latency).start() CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()