Пример #1
0
PROCESS = CrawlerProcess({
    'LOG_ENABLED': '1',
    'LOG_LEVEL': 'ERROR',
    # 'LOG_LEVEL': 'DEBUG',
    'USER_AGENT': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36',
    'DOWNLOADER_MIDDLEWARES': {'__main__.CustomMiddleware': 900},# Need to be > 600 to be after the redirectMiddleware
    'DOWNLOADER_CLIENTCONTEXTFACTORY': 'scrapy_patch.CustomContextFactory'
})

PROCESS.crawl(
    DocumentationSpider,
    config=CONFIG,
    algolia_helper=ALGOLIA_HELPER,
    strategy=STRATEGY
)

PROCESS.start()
PROCESS.stop()

CONFIG.destroy()

ALGOLIA_HELPER.commit_tmp_index()


print("")
print('Nb hits: ' + str(DocumentationSpider.NB_INDEXED))

CONFIG.update_nb_hits(DocumentationSpider.NB_INDEXED)

print("")