Пример #1
0
 def open_spider(self, spider):
     self.session = DBSession()
Пример #2
0
settings.set(
    "USER_AGENT",
    "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36"
)
settings.set(
    "ITEM_PIPELINES",
    {
        # 'pipelines.DuplicatesPipeline': 200,
        # 'pipelines.CountDropPipline': 100,
        'pipelines.SeprojectPipeline': 300
    })
# settings.set("LOG_LEVEL","INFO")

# process = CrawlerProcess(settings)

db = DBSession()
rules = db.query(Rule).filter(Rule.enable == 1)

for rule in rules:
    crawler = Crawler(settings)
    spider = BeijingSpider(rule)  # instantiate every spider using rule
    RUNNING_CRAWLERS.append(spider)

    # stop reactor when spider closes
    crawler.signals.connect(spider_closing, signal=signals.spider_closed)
    crawler.configure()
    crawler.crawl(spider)
    crawler.start()

# blocks process so always keep as the last statement
reactor.run()
Пример #3
0
 def start_requests(self):
     db = DBSession()
     sections = db.query(ChangzhiServerSection).all()
     for section in sections:
         yield Request(section.url,self.parse,meta={'section_id':section.id})
Пример #4
0
 def open_spider(self, spider):
     self.session = DBSession()
     sql2 = "DELETE FROM `RawData` WHERE `spider` = '" + spider.name + "'"
     engine.execute(sql2)