Пример #1
0
 def test_stop(self):
     """
     Test .stop()
     """
     sel = Selector(gt10, is_even, self.gens)
     sel.stop()
     self.assertEqual([], sel.gens)
     self.assertIsNone(sel.started)
     self.assertIsNone(sel.curr)
Пример #2
0
 def test_stop(self):
     """
     Test .stop()
     """
     sel = Selector(gt10, is_even, self.gens)
     sel.stop()
     self.assertEqual([], sel.gens)
     self.assertIsNone(sel.started)
     self.assertIsNone(sel.curr)
Пример #3
0
class Scheduler(object):
    def __init__(self, conf):
        self.conf = conf
        self.log = conf.get('log')
        self.site_statistic = SiteStatistics(conf['site_task_collect_db'])
        self.seed_statistic = SeedStatistics(conf['seed_task_collect_db'])
        self.scheduler = SiteSchedulerManager(self.conf, self.site_statistic,
                                              self.seed_statistic)
        self.selector = Selector(self.scheduler, self.conf)
        self.crawl_selector = CrawlSelector(self.log, conf['selector_conf'],
                                            conf['beanstalk_conf'],
                                            self.scheduler)

    def schedule_task(self, task):
        if task:
            self.scheduler.schedule_task(task)

    def select_seed(self):
        try:
            self.scheduler.select_seed()
        except Exception:
            self.log.error('start_on_site_tasks\terror:%s' %
                           str(traceback.format_exc()))
            return False

    def start_one_site_tasks(self, site):
        try:
            self.log.info('start_one_site_tasks\tsite:%s starting' % site)
            return self.scheduler.start_one_site_tasks(site)
        except Exception:
            self.log.error('start_on_site_tasks\tsite:%s\terror:%s' %
                           (site, str(traceback.format_exc())))
            return False

    def stop_one_site_tasks(self, site):
        self.log.info('site:%s stopping...' % site)
        return self.scheduler.stop_one_site_tasks(site)

    def clear_one_site_cache(self, site):
        self.log.info('site:%s clear cache...' % site)
        return self.scheduler.clear_one_site_cache(site)

    def restart_seed(self, seed_id, site):
        self.log.info('seed:%s site:%s restart...' % (seed_id, site))
        return self.scheduler.restart_seed(seed_id, site)

    def start(self):
        self.scheduler.start()
        self.crawl_selector.start()
        self.selector.start()
        self.site_statistic.start()
        self.seed_statistic.start()
        self.log.info('scheduler start successful')

    def save_status(self):
        self.scheduler.save_status()
        self.selector.stop()
        self.crawl_selector.stop()
        self.site_statistic.stop()
        self.seed_statistic.stop()

    def stop(self):
        self.scheduler.stop()
        self.selector.stop()
        self.crawl_selector.stop()
        self.site_statistic.stop()
        self.seed_statistic.stop()