def webconsole_control(self, wc_request): args = wc_request.args s = "<hr />\n" if "stop_running_domains" in args: s += "<p>" stopped_domains = [] for domain in args["stop_running_domains"]: if domain in self.running: scrapyengine.close_spider(self.running[domain]) stopped_domains.append(domain) s += "Stopped spiders: <ul><li>%s</li></ul>" % "</li><li>".join(stopped_domains) s += "</p>" if "remove_pending_domains" in args: removed = [] for domain in args["remove_pending_domains"]: if scrapyengine.spider_scheduler.remove_pending_domain(domain): removed.append(domain) if removed: s += "<p>" s += "Removed scheduled spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["remove_pending_domains"]) s += "</p>" if "add_pending_domains" in args: for domain in args["add_pending_domains"]: if domain not in scrapyengine.scheduler.pending_requests: scrapymanager.crawl(domain) s += "<p>" s += "Scheduled spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["add_pending_domains"]) s += "</p>" if "rerun_finished_domains" in args: for domain in args["rerun_finished_domains"]: if domain not in scrapyengine.scheduler.pending_requests: scrapymanager.crawl(domain) self.finished.remove(domain) s += "<p>" s += "Re-scheduled finished spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["rerun_finished_domains"]) s += "</p>" return s
def crawl(self, spider, task): ''' Crawl task on specific spider ''' spider.load(task) scrapymanager.crawl(*spider.start_urls)