Exemplo n.º 1
0
    def __init__(self, crawler):
        self._commander = Commander(
            crawler.settings.get('API_SCRAPOXY'),
            crawler.settings.get('API_SCRAPOXY_PASSWORD'))

        self._WAIT_FOR_SCALE = crawler.settings.get('WAIT_FOR_SCALE') or 120

        crawler.signals.connect(self.spider_opened, signals.spider_opened)
        crawler.signals.connect(self.spider_closed, signals.spider_closed)
Exemplo n.º 2
0
    def __init__(self, crawler):
        """Access the settings of the crawler to connect to Scrapoxy.
        """
        self._http_status_codes = crawler.settings.get(
            'BLACKLIST_HTTP_STATUS_CODES', [503])
        self._sleep_min = crawler.settings.get('SCRAPOXY_SLEEP_MIN', 60)
        self._sleep_max = crawler.settings.get('SCRAPOXY_SLEEP_MAX', 180)

        self._commander = Commander(
            crawler.settings.get('API_SCRAPOXY'),
            crawler.settings.get('API_SCRAPOXY_PASSWORD'))
Exemplo n.º 3
0
Arquivo: run.py Projeto: arbal/recrawl
 def __init__(self):
     self.settings = get_project_settings()
     self.commander = Commander(self.settings.get('API_SCRAPOXY'),
                                self.settings.get('API_SCRAPOXY_PASSWORD'))
     configure_logging(settings=None, install_root_handler=False)
     logging.config.dictConfig(self.settings['LOGGING_SETTINGS'])