Пример #1
0
import os

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging

from utils.mover import move_nas
from settings import FILES_STORE

os.chdir('/root/spiderItem/pycan/pycan')

configure_logging()
runner = CrawlerRunner(get_project_settings())


@defer.inlineCallbacks
def crawl():
    # yield runner.crawl('update_company_list')
    # yield runner.crawl('update_latest_ipo_date')
    # yield runner.crawl('update_company_docs')
    yield runner.crawl('docs_downloader')
    reactor.stop()


crawl()
reactor.run()

move_nas(FILES_STORE, '/homes/CAN')
Пример #2
0
import os

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging

from utils.mover import move_nas

os.chdir('/root/spiderItem/pyusa/pyusa')

configure_logging()
runner = CrawlerRunner(get_project_settings())


@defer.inlineCallbacks
def crawl():
    yield runner.crawl('update_company_list')
    yield runner.crawl('update_company_docs')
    reactor.stop()


crawl()
reactor.run()

move_nas('/data/lq/usa/reports', '/data/usa')
move_nas('/data/lq/usa/announcements', '/data/usa_announcements')
Пример #3
0
import os

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging

from utils.mover import move_nas
from settings import FILES_STORE

os.chdir('/root/spiderItem/pyrus/pyrus')

configure_logging()
runner = CrawlerRunner(get_project_settings())


@defer.inlineCallbacks
def crawl():
    yield runner.crawl('update_company_list_en')
    yield runner.crawl('update_company_list_ru')
    yield runner.crawl('update_company_docs')
    reactor.stop()


crawl()
reactor.run()

move_nas(FILES_STORE, '/homes/RUS')
Пример #4
0
import os

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging

from utils.mover import move_nas
from settings import FILES_STORE

os.chdir('/root/spiderItem/pygbr/pygbr')

configure_logging()
runner = CrawlerRunner(get_project_settings())


@defer.inlineCallbacks
def crawl():
    yield runner.crawl('update_company_list')
    yield runner.crawl('update_company_ipo_date')
    yield runner.crawl('update_company_docs')
    yield runner.crawl('update_company_docs_mate')
    reactor.stop()


crawl()
reactor.run()

move_nas(FILES_STORE, '/data/GBR')
Пример #5
0
import os

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging

from utils.mover import move_nas
from settings import FILES_STORE

os.chdir('/root/spiderItem/pyfra/pyfra')

configure_logging()
runner = CrawlerRunner(get_project_settings())


@defer.inlineCallbacks
def crawl():
    yield runner.crawl('update_company_list')
    yield runner.crawl('update_company_docs')
    reactor.stop()


crawl()
reactor.run()

move_nas(FILES_STORE, '/homes/FRA')
Пример #6
0
import os

from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging

from utils.mover import move_nas
from settings import FILES_STORE

os.chdir('/root/spiderItem/pybra/pybra')

configure_logging()
runner = CrawlerRunner(get_project_settings())


@defer.inlineCallbacks
def crawl():
    yield runner.crawl('update_company_list')
    yield runner.crawl('update_company_report')
    yield runner.crawl('update_company_report_mate')
    reactor.stop()


crawl()
reactor.run()

move_nas(FILES_STORE, '/data/bra')