Exemplo n.º 1
0
def main():
    process = CrawlerProcess(get_project_settings())
    process.crawl(PlaysportCrawler)
    scheduler = TwistedScheduler()
    scheduler.add_job(process.crawl,
                      'interval',
                      hours=3,
                      args=[PlaysportCrawler])
    scheduler.add_listener(my_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
    scheduler.start()
    process.start(False)
    _ = _notifier(msg='\n'.join([
        "Scheduler Start",
    ]))
Exemplo n.º 2
0
class Scheduler:
    def __init__(self):
        self.scrapers = [
            HistorySpider, WpbccSpider, LWVChicago, LibraryEvents,
            GreatLakesReader
        ]
        self.interval_seconds = 60 * config.schedule_interval

        self.scheduler = TwistedScheduler()
        self.scheduler.add_listener(self.schedule_missed, EVENT_JOB_MISSED)

    def add_schedule(self, scraper, seconds_delay):
        self.scheduler.add_job(self.run_scraper,
                               id=scraper.__name__,
                               trigger='interval',
                               args=[scraper],
                               start_date=datetime.now() +
                               relativedelta(seconds=seconds_delay),
                               seconds=self.interval_seconds)

    def schedule_missed(self, event):
        print(f'{event.job_id} missed. Interval time: {self.interval_seconds}')

    def run_scraper(self, scraper):
        start_date = datetime.now().strftime('%m-%d-%Y')
        end_date = (datetime.now() +
                    relativedelta(months=+1)).strftime('%m-%d-%Y')
        print(f'{datetime.now()} starting {scraper.__name__}')
        runner = CrawlerRunner(get_project_settings())
        runner.crawl(scraper, start_date, end_date)
        runner.join()

    def run_schedule(self):
        configure_logging()
        start_interval = self.interval_seconds / len(self.scrapers)
        now = datetime.now()
        self.last_scheduled = now
        for index, scraper in enumerate(self.scrapers):
            self.add_schedule(scraper, start_interval * index)

        self.scheduler.start()
        reactor.run()
Exemplo n.º 3
0
            event.job_id
        ), 'jobs')

    else:
        toLog('Event {} happenend'.format(
            event_code_translator(event.code)),
            'jobs'
        )


def event_code_translator(code):
    event_dict = {
        1: 'EVENT_SCHEDULER_START',
        2: 'EVENT_SCHEDULER_SHUTDOWN',
        4: 'EVENT_EXECUTOR_ADDED',
        8: 'EVENT_EXECUTOR_REMOVED',
        16: 'EVENT_JOBSTORE_ADDED',
        32: 'EVENT_JOBSTORE_REMOVED',
        64: 'EVENT_ALL_JOBS_REMOVED',
        128: 'EVENT_JOB_ADDED',
        256: 'EVENT_JOB_REMOVED',
        512: 'EVENT_JOB_MODIFIED',
        1024: 'EVENT_JOB_EXECUTED',
        2048: 'EVENT_JOB_ERROR',
        4096: 'EVENT_JOB_MISSED'
    }

    return event_dict.get(code, None)

scheduler.add_listener(job_logger, events.EVENT_ALL)
Exemplo n.º 4
0
        toLog(
            'Event {} for job {} happenend'.format(
                event_code_translator(event.code), event.job_id), 'jobs')

    else:
        toLog('Event {} happenend'.format(event_code_translator(event.code)),
              'jobs')


def event_code_translator(code):
    event_dict = {
        1: 'EVENT_SCHEDULER_START',
        2: 'EVENT_SCHEDULER_SHUTDOWN',
        4: 'EVENT_EXECUTOR_ADDED',
        8: 'EVENT_EXECUTOR_REMOVED',
        16: 'EVENT_JOBSTORE_ADDED',
        32: 'EVENT_JOBSTORE_REMOVED',
        64: 'EVENT_ALL_JOBS_REMOVED',
        128: 'EVENT_JOB_ADDED',
        256: 'EVENT_JOB_REMOVED',
        512: 'EVENT_JOB_MODIFIED',
        1024: 'EVENT_JOB_EXECUTED',
        2048: 'EVENT_JOB_ERROR',
        4096: 'EVENT_JOB_MISSED'
    }

    return event_dict.get(code, None)


scheduler.add_listener(job_logger, events.EVENT_ALL)
Exemplo n.º 5
0
            # else:
            #     # job not scheduled, add it and run now
            #     scheduler.add_job(FilterAndInsertData, 'cron', args=[PttMoviesSpider])


if __name__ == '__main__':
    process = CrawlerProcess(get_project_settings())
    scheduler = TwistedScheduler()
    scheduler.add_job(process.crawl,
                      trigger,
                      args=[YahoomovieSpider],
                      name='yahoo')
    # scheduler.get_job(job_id ="my_job_id").modify(next_run_time=datetime.datetime.now())
    scheduler.add_job(process.crawl,
                      'cron',
                      args=[PttMoviesSpider],
                      hour='23',
                      minute='59',
                      name='ptt')
    # scheduler.add_job(FilterAndInsertData, 'cron', day='last sun', name='insertData')
    scheduler.add_listener(execution_listener,
                           EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
    scheduler.start()
    process.start(False)  # Do not stop reactor after spider closes

    # try:
    #     while True:
    #         time.sleep(1)
    # except (KeyboardInterrupt, SystemExit):
    #     scheduler.shutdown()
Exemplo n.º 6
0
import logging

logging.basicConfig(format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S',filename='list.log', filemode='w')

n = 6000
ll = [0 for i in range(n)]

def func(i):
        ll[i] += 1

def err_lis(ev):
    logger = logging.getLogger("")
    logger.error(str(ev))

#scheduler = BackgroundScheduler()
scheduler = TwistedScheduler()
for i in range(n):
    start = datetime.datetime.now() + datetime.timedelta(seconds=i%10)
    scheduler.add_job(func, 'interval', args=(i,), start_date=start, seconds=10)

scheduler.add_listener(err_lis, apscheduler.events.EVENT_JOB_ERROR | apscheduler.events.EVENT_JOB_MISSED)
scheduler.start()
time.sleep(5)
scheduler.shutdown()
s = 0
for i in ll:
    s+=i
print s