#encoding=utf-8 from twisted.internet import reactor from scrapy.crawler import Crawler from scrapy.settings import Settings from scrapy import log from datacrawler.spiders.bbs_spider import bbsSpider spider = bbsSpider(domain="bbs.nju.edu.cn") crawler = Crawler(Settings()) crawler.configure() crawler.crawl(spider) crawler.start() log.start() reactor.run()
from multiprocessing.queues import Queue import multiprocessing from datacrawler.spiders.bbs_spider import bbsSpider class CrawlerWorker(multiprocessing.Process): def __init__(self,spider,result_queue): multiprocessing.Process.__init__(self) self.result_queue = result_queue self.crawler = CrawlerProcess(Settings) if not hasattr(project,'crawler'): self.crawler.install() self.crawler.configure() self.items = [] self.spider = spider dispatcher.connect(self._item_passed,signals.item_passed) def _item_passed(self): self.crawler.crawl(self.spider) self.crawler.start() self.crawler.stop() self.result_queue.put(self.items) result_queue = Queue() crawler = CrawlerWorker(bbsSpider(domain="bbs.nju.edu.cn"),result_queue) crawler.start() for item in result_queue.get(): print item