def crawl_url(self, async_mode, url, input_message, user_agent, referer, page_last_modified): request_header = copy.deepcopy(settings.default_headers) #set user agent if user_agent is not None: request_header['User-Agent'] = user_agent #set referer if referer is not None: request_header["Referer"] = referer; #set if_modified_since if page_last_modified is not None: request_header['If-Modified-Since'] = page_last_modified meta = input_message meta["dns_cache_enabled"] = settings.dns_cache_enabled meta["chunked_transfer_decoding"] = settings.chunked_transfer_decoding #start crawling result =self._downloader.crawl( async_mode, url, settings.timeout, request_header, settings.robotstxt_enabled, meta) return misc.postprocess(self._async_mode, result, self.process_crawler_response)
def crawl(self, async_mode, url, timeout, request_header, robotstxt_enabled, meta): success, result = Downloader.preprocess(url, robotstxt_enabled) if not success: if async_mode: result = defer_fail(result) else: if async_mode: result = self._crawl_async(url, timeout, request_header, meta) else: result = self._crawl_sync(url, timeout, request_header, meta) return misc.postprocess(async_mode, result, Downloader.postprocess)