def test_frontera_settings_have_precedence_over_crawler_settings(): crawler_settings = { 'MAX_REQUESTS': 10, 'FRONTERA_SETTINGS': 'tests.scrapy_spider.frontera.settings' } settings = ScrapySettingsAdapter(crawler_settings) assert settings.get('MAX_REQUESTS') == 5
def __init__(self, crawler): settings = ScrapySettingsAdapter(crawler.settings) self.partition_id = settings.get('SPIDER_PARTITION_ID') # XXX this can be improved later by reusing spider's producer # (crawler->engine->slot->scheduler->frontier->manager-> backend->_producer) # but the topic is hard-coded in the current scheme, so it requires some # preliminary changes in Frontera itself. message_bus = load_object(settings.get('MESSAGE_BUS'))(settings) stats_log = message_bus.stats_log() if not stats_log: raise NotConfigured self.stats_producer = stats_log.producer() self._stats_interval = settings.get('STATS_LOG_INTERVAL', 60) codec_path = settings.get('MESSAGE_BUS_CODEC') encoder_cls = load_object(codec_path + ".Encoder") self._stats_encoder = encoder_cls(request_model=None) # no need to encode requests self._export_stats_task = None
def __init__(self, crawler): settings = ScrapySettingsAdapter(crawler.settings) self.partition_id = settings.get('SPIDER_PARTITION_ID') # XXX this can be improved later by reusing spider's producer # (crawler->engine->slot->scheduler->frontier->manager-> backend->_producer) # but the topic is hard-coded in the current scheme, so it requires some # preliminary changes in Frontera itself. message_bus = load_object(settings.get('MESSAGE_BUS'))(settings) stats_log = message_bus.stats_log() if not stats_log: raise NotConfigured self.stats_producer = stats_log.producer() self._stats_interval = settings.get('STATS_LOG_INTERVAL', 60) codec_path = settings.get('MESSAGE_BUS_CODEC') encoder_cls = load_object(codec_path + ".Encoder") self._stats_encoder = encoder_cls( request_model=None) # no need to encode requests self._export_stats_task = None
def test_fallsback_to_crawler_settings(): settings = ScrapySettingsAdapter({'DELAY_ON_EMPTY': 10}) assert settings.get('DELAY_ON_EMPTY') == 10
def test_frontera_settings_have_precedence_over_crawler_settings(): crawler_settings = {'MAX_REQUESTS': 10, 'FRONTERA_SETTINGS': 'tests.scrapy_spider.frontera.settings'} settings = ScrapySettingsAdapter(crawler_settings) assert settings.get('MAX_REQUESTS') == 5