Exemplo n.º 1
0
def test_frontera_settings_have_precedence_over_crawler_settings():
    crawler_settings = {
        'MAX_REQUESTS': 10,
        'FRONTERA_SETTINGS': 'tests.scrapy_spider.frontera.settings'
    }
    settings = ScrapySettingsAdapter(crawler_settings)
    assert settings.get('MAX_REQUESTS') == 5
Exemplo n.º 2
0
 def __init__(self, crawler):
     settings = ScrapySettingsAdapter(crawler.settings)
     self.partition_id = settings.get('SPIDER_PARTITION_ID')
     # XXX this can be improved later by reusing spider's producer
     # (crawler->engine->slot->scheduler->frontier->manager-> backend->_producer)
     # but the topic is hard-coded in the current scheme, so it requires some
     # preliminary changes in Frontera itself.
     message_bus = load_object(settings.get('MESSAGE_BUS'))(settings)
     stats_log = message_bus.stats_log()
     if not stats_log:
         raise NotConfigured
     self.stats_producer = stats_log.producer()
     self._stats_interval = settings.get('STATS_LOG_INTERVAL', 60)
     codec_path = settings.get('MESSAGE_BUS_CODEC')
     encoder_cls = load_object(codec_path + ".Encoder")
     self._stats_encoder = encoder_cls(request_model=None)  # no need to encode requests
     self._export_stats_task = None
Exemplo n.º 3
0
 def __init__(self, crawler):
     settings = ScrapySettingsAdapter(crawler.settings)
     self.partition_id = settings.get('SPIDER_PARTITION_ID')
     # XXX this can be improved later by reusing spider's producer
     # (crawler->engine->slot->scheduler->frontier->manager-> backend->_producer)
     # but the topic is hard-coded in the current scheme, so it requires some
     # preliminary changes in Frontera itself.
     message_bus = load_object(settings.get('MESSAGE_BUS'))(settings)
     stats_log = message_bus.stats_log()
     if not stats_log:
         raise NotConfigured
     self.stats_producer = stats_log.producer()
     self._stats_interval = settings.get('STATS_LOG_INTERVAL', 60)
     codec_path = settings.get('MESSAGE_BUS_CODEC')
     encoder_cls = load_object(codec_path + ".Encoder")
     self._stats_encoder = encoder_cls(
         request_model=None)  # no need to encode requests
     self._export_stats_task = None
Exemplo n.º 4
0
def test_fallsback_to_crawler_settings():
    settings = ScrapySettingsAdapter({'DELAY_ON_EMPTY': 10})
    assert settings.get('DELAY_ON_EMPTY') == 10
def test_fallsback_to_crawler_settings():
    settings = ScrapySettingsAdapter({'DELAY_ON_EMPTY': 10})
    assert settings.get('DELAY_ON_EMPTY') == 10
def test_frontera_settings_have_precedence_over_crawler_settings():
    crawler_settings = {'MAX_REQUESTS': 10,
                        'FRONTERA_SETTINGS': 'tests.scrapy_spider.frontera.settings'}
    settings = ScrapySettingsAdapter(crawler_settings)
    assert settings.get('MAX_REQUESTS') == 5