def _schedule(self, request, spider): if spider is None: spider = create_spider_for_request(self.crawler.spiders, request, \ BaseSpider('default'), log_multiple=True) spider.set_crawler(self.crawler) self.crawler.engine.open_spider(spider) d = self.crawler.engine.schedule(request, spider) d.addCallback(lambda x: (x, spider)) return d
def _open_spider(self, request, spider): if self.spider: return self.spider if spider is None: spider = create_spider_for_request(self.crawler.spiders, request, \ BaseSpider('default'), log_multiple=True) spider.set_crawler(self.crawler) self.crawler.engine.open_spider(spider, close_if_idle=False) self.spider = spider return spider
def set_spider(self, url, opts): if opts.spider: try: self.spider = self.crawler.spiders.create(opts.spider) except KeyError: log.msg('Unable to find spider: %s' % opts.spider, log.ERROR) else: self.spider = create_spider_for_request(self.crawler.spiders, url) if not self.spider: log.msg('Unable to find spider for: %s' % request, log.ERROR)
def _schedule(self, request, spider): if spider is None: spider = create_spider_for_request(self.crawler.spiders, request, \ BaseSpider('default'), log_multiple=True) spider.set_crawler(self.crawler) self.crawler.engine.open_spider(spider, close_if_idle=False) d = request_deferred(request) d.addCallback(lambda x: (x, spider)) self.crawler.engine.crawl(request, spider) return d
def get_spider(self, request, opts): if opts.spider: try: return self.crawler.spiders.create(opts.spider) except KeyError: log.msg('Unable to find spider: %s' % opts.spider, log.ERROR) else: spider = create_spider_for_request(self.crawler.spiders, request) if spider: return spider log.msg('Unable to find spider for: %s' % request, log.ERROR)
def append_url(self, url=None, spider=None, **kwargs): """Append a URL to crawl with the given spider. If the spider is not given, a spider will be looked up based on the URL """ if url is None: raise ValueError("A url is required") if spider is None: spider = create_spider_for_request(self._spiders, Request(url), \ **kwargs) if spider: requests = arg_to_iter(spider.make_requests_from_url(url)) self.spider_requests.append((spider, requests))
def set_spider(self, url, opts): if opts.spider: try: self.spider = self.crawler.spiders.create(opts.spider) except KeyError: log.msg(format='Unable to find spider: %(spider)s', level=log.ERROR, spider=opts.spider) else: self.spider = create_spider_for_request(self.crawler.spiders, Request(url)) if not self.spider: log.msg(format='Unable to find spider for: %(url)s', level=log.ERROR, url=url)
def run(self, args, opts): if len(args) != 1 or not is_url(args[0]): raise UsageError() cb = lambda x: self._print_response(x, opts) request = Request(args[0], callback=cb, dont_filter=True) request.meta['handle_httpstatus_all'] = True spider = None if opts.spider: spider = self.crawler.spiders.create(opts.spider) else: spider = create_spider_for_request(self.crawler.spiders, request, \ default_spider=BaseSpider('default')) self.crawler.crawl(spider, [request]) self.crawler.start()
def append_request(self, request, spider=None, **kwargs): if spider is None: spider = create_spider_for_request(self._spiders, request, **kwargs) if spider: self.spider_requests.append((spider, [request]))