示例#1
0
 def process_item(self, spider, item):
     sampled = stats.get_value("items_sampled", 0, spider=spider)
     if sampled < items_per_spider:
         self.items[item.guid] = item
         sampled += 1
         stats.set_value("items_sampled", sampled, spider=spider)
         log.msg("Sampled %s" % item, spider=spider, level=log.INFO)
         if close_spider and sampled == items_per_spider:
             scrapyengine.close_spider(spider)
     return item
示例#2
0
    def webconsole_control(self, wc_request):
        args = wc_request.args
        s = "<hr />\n"

        if "stop_running_domains" in args:
            s += "<p>"
            stopped_domains = []
            for domain in args["stop_running_domains"]:
                if domain in self.running:
                    scrapyengine.close_spider(self.running[domain])
                    stopped_domains.append(domain)
            s += "Stopped spiders: <ul><li>%s</li></ul>" % "</li><li>".join(stopped_domains)
            s += "</p>"
        if "remove_pending_domains" in args:
            removed = []
            for domain in args["remove_pending_domains"]:
                if scrapyengine.spider_scheduler.remove_pending_domain(domain):
                    removed.append(domain)
            if removed:
                s += "<p>"
                s += "Removed scheduled spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["remove_pending_domains"])
                s += "</p>"
        if "add_pending_domains" in args:
            for domain in args["add_pending_domains"]:
                if domain not in scrapyengine.scheduler.pending_requests:
                    scrapymanager.crawl(domain)
            s += "<p>"
            s += "Scheduled spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["add_pending_domains"])
            s += "</p>"
        if "rerun_finished_domains" in args:
            for domain in args["rerun_finished_domains"]:
                if domain not in scrapyengine.scheduler.pending_requests:
                    scrapymanager.crawl(domain)
                self.finished.remove(domain)
            s += "<p>"
            s += "Re-scheduled finished spiders: <ul><li>%s</li></ul>" % "</li><li>".join(args["rerun_finished_domains"])
            s += "</p>"

        return s
示例#3
0
 def item_passed(self, item, spider):
     self.counts[spider] += 1
     if self.counts[spider] == self.itempassed:
         scrapyengine.close_spider(spider, 'closespider_itempassed')