import os from twisted.internet import reactor, defer from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings from scrapy.utils.log import configure_logging from utils.mover import move_nas from settings import FILES_STORE os.chdir('/root/spiderItem/pycan/pycan') configure_logging() runner = CrawlerRunner(get_project_settings()) @defer.inlineCallbacks def crawl(): # yield runner.crawl('update_company_list') # yield runner.crawl('update_latest_ipo_date') # yield runner.crawl('update_company_docs') yield runner.crawl('docs_downloader') reactor.stop() crawl() reactor.run() move_nas(FILES_STORE, '/homes/CAN')
import os from twisted.internet import reactor, defer from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings from scrapy.utils.log import configure_logging from utils.mover import move_nas os.chdir('/root/spiderItem/pyusa/pyusa') configure_logging() runner = CrawlerRunner(get_project_settings()) @defer.inlineCallbacks def crawl(): yield runner.crawl('update_company_list') yield runner.crawl('update_company_docs') reactor.stop() crawl() reactor.run() move_nas('/data/lq/usa/reports', '/data/usa') move_nas('/data/lq/usa/announcements', '/data/usa_announcements')
import os from twisted.internet import reactor, defer from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings from scrapy.utils.log import configure_logging from utils.mover import move_nas from settings import FILES_STORE os.chdir('/root/spiderItem/pyrus/pyrus') configure_logging() runner = CrawlerRunner(get_project_settings()) @defer.inlineCallbacks def crawl(): yield runner.crawl('update_company_list_en') yield runner.crawl('update_company_list_ru') yield runner.crawl('update_company_docs') reactor.stop() crawl() reactor.run() move_nas(FILES_STORE, '/homes/RUS')
import os from twisted.internet import reactor, defer from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings from scrapy.utils.log import configure_logging from utils.mover import move_nas from settings import FILES_STORE os.chdir('/root/spiderItem/pygbr/pygbr') configure_logging() runner = CrawlerRunner(get_project_settings()) @defer.inlineCallbacks def crawl(): yield runner.crawl('update_company_list') yield runner.crawl('update_company_ipo_date') yield runner.crawl('update_company_docs') yield runner.crawl('update_company_docs_mate') reactor.stop() crawl() reactor.run() move_nas(FILES_STORE, '/data/GBR')
import os from twisted.internet import reactor, defer from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings from scrapy.utils.log import configure_logging from utils.mover import move_nas from settings import FILES_STORE os.chdir('/root/spiderItem/pyfra/pyfra') configure_logging() runner = CrawlerRunner(get_project_settings()) @defer.inlineCallbacks def crawl(): yield runner.crawl('update_company_list') yield runner.crawl('update_company_docs') reactor.stop() crawl() reactor.run() move_nas(FILES_STORE, '/homes/FRA')
import os from twisted.internet import reactor, defer from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings from scrapy.utils.log import configure_logging from utils.mover import move_nas from settings import FILES_STORE os.chdir('/root/spiderItem/pybra/pybra') configure_logging() runner = CrawlerRunner(get_project_settings()) @defer.inlineCallbacks def crawl(): yield runner.crawl('update_company_list') yield runner.crawl('update_company_report') yield runner.crawl('update_company_report_mate') reactor.stop() crawl() reactor.run() move_nas(FILES_STORE, '/data/bra')