Пример #1
0
class SpiderSchedulerTest(unittest.TestCase):
    def setUp(self):
        d = self.mktemp()
        eggs_dir = self.eggs_dir = os.path.join(d, 'eggs')
        dbs_dir = os.path.join(d, 'dbs')
        os.mkdir(d)
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, 'mybot1'))
        os.makedirs(os.path.join(eggs_dir, 'mybot2'))
        config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
        self.queues = get_spider_queues(config)
        self.sched = SpiderScheduler(config)

    def test_interface(self):
        verifyObject(ISpiderScheduler, self.sched)

    def test_list_update_projects(self):
        self.assertEqual(sorted(self.sched.list_projects()),
                         sorted(['mybot1', 'mybot2']))
        os.makedirs(os.path.join(self.eggs_dir, 'mybot3'))
        self.sched.update_projects()
        self.assertEqual(sorted(self.sched.list_projects()),
                         sorted(['mybot1', 'mybot2', 'mybot3']))

    def test_schedule(self):
        q1, q2 = self.queues['mybot1'], self.queues['mybot2']
        self.failIf(q1.count())
        self.sched.schedule('mybot1', 'myspider1', 2, a='b')
        self.sched.schedule('mybot2', 'myspider2', 1, c='d')
        self.sched.schedule('mybot2', 'myspider3', 10, e='f')
        self.assertEqual(q1.pop(), {'name': 'myspider1', 'a': 'b'})
        self.assertEqual(q2.pop(), {'name': 'myspider3', 'e': 'f'})
        self.assertEqual(q2.pop(), {'name': 'myspider2', 'c': 'd'})
Пример #2
0
class SpiderSchedulerTest(unittest.TestCase):

    def setUp(self):
        d = self.mktemp()
        eggs_dir = self.eggs_dir = os.path.join(d, 'eggs')
        dbs_dir = os.path.join(d, 'dbs')
        os.mkdir(d)
        os.makedirs(eggs_dir)
        os.makedirs(dbs_dir)
        os.makedirs(os.path.join(eggs_dir, 'mybot1'))
        os.makedirs(os.path.join(eggs_dir, 'mybot2'))
        config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir})
        self.queues = get_spider_queues(config)
        self.sched = SpiderScheduler(config)

    def test_interface(self):
        verifyObject(ISpiderScheduler, self.sched)

    def test_list_update_projects(self):
        self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2']))
        os.makedirs(os.path.join(self.eggs_dir, 'mybot3'))
        self.sched.update_projects()
        self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2', 'mybot3']))

    def test_schedule(self):
        q = self.queues['mybot1']
        self.failIf(q.count())
        self.sched.schedule('mybot1', 'myspider1', a='b')
        self.sched.schedule('mybot2', 'myspider2', c='d')
        self.assertEqual(q.pop(), {'name': 'myspider1', 'a': 'b'})
        q = self.queues['mybot2']
        self.assertEqual(q.pop(), {'name': 'myspider2', 'c': 'd'})
Пример #3
0
    def startService(self):
        spider_scheduler = SpiderScheduler(self.config)
        running = get_spider_running(self.config)
        for project in running.keys():
            runner_db = running[project]
            for item in runner_db.iteritems():
                spider_scheduler.schedule(project, str(item[1]['_spider']), _job=str(item[0]), domain=str(item[1]['domain']), settings=item[1]['settings'])
            finished_jobs = get_spider_finished(self.config)
            finished_db = finished_jobs[project]
            for item in finished_db.iteritems():
                item = json.loads(item[1])
                pp = ScrapyProcessProtocol(item['slot'], item['project'], item['spider'], item['job'], item['env'], domain=item['domain'])
                pp.end_time = parser.parse(item['end_time'])
                pp.start_time = parser.parse(item['start_time'])
                self.finished.append(pp)

        for slot in range(self.max_proc):
            self._wait_for_project(slot)
        log.msg(format='Scrapyd %(version)s started: max_proc=%(max_proc)r, runner=%(runner)r',
                version=__version__, max_proc=self.max_proc,
                runner=self.runner, system='Launcher')