Пример #1
0
    def process_request(self, request, spider):
        # The proxy only works if the request comes from a spider that
        # have an operation associated (`catalog`, `collection`, etc)
        has_operation = hasattr(spider, '_operation')
        operations = ('catalog', 'collection', 'latest', 'manga')
        if not has_operation or spider._operation not in operations:
            return

        logger.debug(
            'Process request - proxy: %s, url: %s' %
            (request.meta['proxy'] if 'proxy' in request.meta else 'no',
             request.url))

        # If the proxy is already set, we are done
        if 'proxy' in request.meta:
            return

        if needs_proxy(spider.name):
            proxy = Proxy.objects.get_one(spider.name)
            if proxy:
                logger.info('Using proxy <%s> for request' % proxy)
                request.meta['proxy'] = 'http://%s' % proxy.proxy
                # Disable redirection when a proxy is in use
                request.meta['dont_redirect'] = True
            else:
                logger.error('No proxy found for %s' % spider.name)
Пример #2
0
    def process_request(self, request, spider):
        # The proxy only works if the operation is fetch an issue or a
        # commection
        has_operation = hasattr(spider, '_operation')
        operations = ('catalog', 'collection', 'latest', 'manga')
        if not has_operation or spider._operation not in operations:
            return

        logger.debug(
            'Process request - proxy: %s, url: %s' %
            (request.meta['proxy'] if 'proxy' in request.meta else 'no',
             request.url))

        # If the proxy is already set, we are done
        if 'proxy' in request.meta:
            return

        if needs_proxy(spider.name):
            proxy = Proxy.objects.filter(source__spider=spider.name)
            proxy = proxy.order_by('?').first()
            if proxy:
                logger.info('Using proxy <%s> for request' % proxy)
                request.meta['proxy'] = 'http://%s' % proxy.proxy
                # Disable redirection when a proxy is in use
                request.meta['dont_redirect'] = True
            else:
                logger.error('No proxy found for %s' % spider.name)
Пример #3
0
def send(issues, user, accounts=None, loglevel=logging.WARNING):
    """Send a list of issues to an user."""

    for issue in issues:
        issue.create_result_if_needed(user, Result.PROCESSING)

    # Split the issues in `fast` (direct access) and `slow` (needs proxy)
    fast_issues = [i for i in issues if not needs_proxy(i.manga.source.spider)]
    slow_issues = [i for i in issues if needs_proxy(i.manga.source.spider)]

    if fast_issues:
        scrape_job = scrape_issues.delay(fast_issues, accounts, loglevel)
        # This job also update the Result status
        create_mobi_and_send.delay(fast_issues, user, depends_on=scrape_job)

    if slow_issues:
        scrape_job = scrape_issues_slow.delay(slow_issues, accounts, loglevel)
        # This job also update the Result status
        create_mobi_and_send.delay(slow_issues, user, depends_on=scrape_job)
Пример #4
0
    def _list_spiders(self, scrapy):
        """List current spiders than can be activated."""
        header = 'List of current spiders:'
        spiders = scrapy.spider_list()

        self.stdout.write(header)
        self.stdout.write('=' * len(header))
        self.stdout.write('')
        for spider in spiders:
            if needs_proxy(spider):
                self.stdout.write('- %s (PROXY)' % spider)
            else:
                self.stdout.write('- %s' % spider)