Пример #1
0
    def setUp(self):
        super().setUp()

        # Create retrieval manager
        self.retrieval_manager = PeriodicRetrievalManager(RETRIEVAL_PERIOD, self.update_mapper, self.logger)
Пример #2
0
def run(config_location):
    # Load config
    config = load_config(os.path.join(config_location, "setup.conf"))

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP(S) connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(os.environ.get('COOKIEMONSTER_POOL_SIZE', 16))
    patch_connection_pools(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second,
                                       config.cookie_jar.url,
                                       config.cookie_jar.database,
                                       config.cookie_jar.buffer_capacity,
                                       config.cookie_jar.buffer_latency,
                                       verify=config.cookie_jar.cacert)
    add_cookie_jar_logging(cookie_jar, logger)
    add_couchdb_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    slack = None

    # Setup rule output log file writer
    rule_log_writer = RuleOutputWriter(config.output.log_file)

    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, rule_log_writer, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_retrieval_manager_to_since_file(retrieval_manager, config_location)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.inject(APIDependency.System, None)
    api.listen(config.api.port)

    # Start the retrieval manager from the last known successful
    # retrieval time (or invocation time, otherwise)
    try:
        with open(os.path.join(config_location, "since"), "r") as f:
            since_time = datetime.fromtimestamp(int(f.read()))
    except:
        since_time = datetime.now()

    retrieval_manager.start(since_time)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()
Пример #3
0
class TestPeriodicRetrievalManager(_BaseRetrievalManagerTest):
    """
    Test cases for `PeriodicRetrievalManager`.
    """
    def setUp(self):
        super().setUp()

        # Create retrieval manager
        self.retrieval_manager = PeriodicRetrievalManager(RETRIEVAL_PERIOD, self.update_mapper, self.logger)

    @unittest.skip("Flaky test")
    def test_run(self):
        cycles = 10
        listener = MagicMock()

        self.logger.add = MagicMock()
        self.retrieval_manager.add_listener(listener)

        self._setup_to_do_n_cycles(cycles, self.updates)

        self.assertEqual(self.logger.record.call_count, cycles)
        listener.assert_has_calls([call(self.updates) for _ in range(cycles)])

    def test_run_if_running(self):
        Thread(target=self.retrieval_manager.run).start()
        self.assertRaises(RuntimeError, self.retrieval_manager.run)

    def test_stop_and_then_restart(self):
        self.retrieval_manager.start()
        self.retrieval_manager.stop()
        self.retrieval_manager.start()

    def _setup_to_do_n_cycles(self, number_of_cycles: int, updates_each_cycle: UpdateCollection=None):
        """
        Sets up the test so that the retriever will only do n cycles.
        :param number_of_cycles: the number of cycles to do
        """
        if updates_each_cycle is None:
            updates_each_cycle = UpdateCollection([])

        semaphore = Semaphore(0)
        lock_until_counted = Lock()
        lock_until_counted.acquire()

        def increase_counter(*args) -> UpdateCollection:
            semaphore.release()
            lock_until_counted.acquire()
            return updates_each_cycle

        self.retrieval_manager.update_mapper.get_all_since.side_effect = increase_counter
        self.retrieval_manager.start()

        run_counter = 0
        while run_counter < number_of_cycles:
            semaphore.acquire()
            run_counter += 1
            lock_until_counted.release()
            if run_counter == number_of_cycles:
                self.retrieval_manager.stop()

        self.retrieval_manager.update_mapper.get_all_since.side_effect = None

    def tearDown(self):
        self.retrieval_manager.stop()
Пример #4
0
def run(config_location):
    # Load config
    config = load_config(config_location)

    # Setup measurement logging
    logging_buffer_latency = timedelta(seconds=config.influxdb.buffer_latency)
    influxdb_config = InfluxDBConnectionConfig(config.influxdb.host, config.influxdb.port, config.influxdb.username,
                                               config.influxdb.password, config.influxdb.database)
    logger = InfluxDBLogger(influxdb_config, buffer_latency=logging_buffer_latency)

    # Set HTTP connection pool size (for CouchDB)
    # NOTE This is taken from an environment variable, as it's not
    # something that would probably need tweaking that much:
    pool_size = int(environ.get('COOKIEMONSTER_HTTP_POOL_SIZE', 16))
    patch_http_connection_pool(maxsize=pool_size)

    # Setup cookie jar
    cookie_jar = RateLimitedBiscuitTin(config.cookie_jar.max_requests_per_second, config.cookie_jar.url,
                                       config.cookie_jar.database)
    add_cookie_jar_logging(cookie_jar, logger)

    # Setup data retrieval manager
    update_mapper = BatonUpdateMapper(config.baton.binaries_location, zone=config.baton.zone)
    retrieval_manager = PeriodicRetrievalManager(config.retrieval.period, update_mapper, logger)

    # # Setup basic Slack client
    # slack = BasicSlackClient(config.slack.token, config.slack.default_channel, config.slack.default_username)
    #
    # # Setup basic message queue (e.g. RabbitMQ) client
    # message_queue = BasicMessageQueue(config.message_queue.host, config.message_queue.port,
    #                                   config.message_queue.username, config.message_queue.password)
    slack = message_queue = None

    # Define the context that rules and enrichment loaders has access to
    context = HgiContext(cookie_jar, config, slack, message_queue)

    # Setup rules source
    rules_source = RuleSource(config.processing.rules_location, context)
    rules_source.start()

    # Setup enrichment loader source
    enrichment_loader_source = EnrichmentLoaderSource(config.processing.enrichment_loaders_location, context)
    enrichment_loader_source.start()

    # Setup the data processor manager
    processor_manager = BasicProcessorManager(cookie_jar, rules_source, enrichment_loader_source,
                                              config.processing.max_threads, logger)

    # Connect components to the cookie jar
    _connect_retrieval_manager_to_cookie_jar(retrieval_manager, cookie_jar, config.cookie_jar.max_requests_per_second,
                                             logger)
    _connect_processor_manager_to_cookie_jar(processor_manager, cookie_jar)

    # Setup the HTTP API
    api = HTTP_API()
    api.inject(APIDependency.CookieJar, cookie_jar)
    api.listen(config.api.port)

    # Start the retrieval manager
    retrieval_manager.start(config.retrieval.since)

    # Start processing of any unprocessed cookies
    processor_manager.process_any_cookies()

    # Setup monitors
    ThreadsMonitor(logger, logging_buffer_latency).start()
    CookieJarMonitor(logger, logging_buffer_latency, cookie_jar).start()