class SpiderSchedulerTest(unittest.TestCase): def setUp(self): d = self.mktemp() eggs_dir = self.eggs_dir = os.path.join(d, 'eggs') dbs_dir = os.path.join(d, 'dbs') os.mkdir(d) os.makedirs(eggs_dir) os.makedirs(dbs_dir) os.makedirs(os.path.join(eggs_dir, 'mybot1')) os.makedirs(os.path.join(eggs_dir, 'mybot2')) config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir}) self.queues = get_spider_queues(config) self.sched = SpiderScheduler(config) def test_interface(self): verifyObject(ISpiderScheduler, self.sched) def test_list_update_projects(self): self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2'])) os.makedirs(os.path.join(self.eggs_dir, 'mybot3')) self.sched.update_projects() self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2', 'mybot3'])) def test_schedule(self): q1, q2 = self.queues['mybot1'], self.queues['mybot2'] self.failIf(q1.count()) self.sched.schedule('mybot1', 'myspider1', 2, a='b') self.sched.schedule('mybot2', 'myspider2', 1, c='d') self.sched.schedule('mybot2', 'myspider3', 10, e='f') self.assertEqual(q1.pop(), {'name': 'myspider1', 'a': 'b'}) self.assertEqual(q2.pop(), {'name': 'myspider3', 'e': 'f'}) self.assertEqual(q2.pop(), {'name': 'myspider2', 'c': 'd'})
class SpiderSchedulerTest(unittest.TestCase): def setUp(self): d = self.mktemp() eggs_dir = self.eggs_dir = os.path.join(d, 'eggs') dbs_dir = os.path.join(d, 'dbs') os.mkdir(d) os.makedirs(eggs_dir) os.makedirs(dbs_dir) os.makedirs(os.path.join(eggs_dir, 'mybot1')) os.makedirs(os.path.join(eggs_dir, 'mybot2')) config = Config(values={'eggs_dir': eggs_dir, 'dbs_dir': dbs_dir}) self.queues = get_spider_queues(config) self.sched = SpiderScheduler(config) def test_interface(self): verifyObject(ISpiderScheduler, self.sched) def test_list_update_projects(self): self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2'])) os.makedirs(os.path.join(self.eggs_dir, 'mybot3')) self.sched.update_projects() self.assertEqual(sorted(self.sched.list_projects()), sorted(['mybot1', 'mybot2', 'mybot3'])) def test_schedule(self): q = self.queues['mybot1'] self.failIf(q.count()) self.sched.schedule('mybot1', 'myspider1', a='b') self.sched.schedule('mybot2', 'myspider2', c='d') self.assertEqual(q.pop(), {'name': 'myspider1', 'a': 'b'}) q = self.queues['mybot2'] self.assertEqual(q.pop(), {'name': 'myspider2', 'c': 'd'})
def startService(self): spider_scheduler = SpiderScheduler(self.config) running = get_spider_running(self.config) for project in running.keys(): runner_db = running[project] for item in runner_db.iteritems(): spider_scheduler.schedule(project, str(item[1]['_spider']), _job=str(item[0]), domain=str(item[1]['domain']), settings=item[1]['settings']) finished_jobs = get_spider_finished(self.config) finished_db = finished_jobs[project] for item in finished_db.iteritems(): item = json.loads(item[1]) pp = ScrapyProcessProtocol(item['slot'], item['project'], item['spider'], item['job'], item['env'], domain=item['domain']) pp.end_time = parser.parse(item['end_time']) pp.start_time = parser.parse(item['start_time']) self.finished.append(pp) for slot in range(self.max_proc): self._wait_for_project(slot) log.msg(format='Scrapyd %(version)s started: max_proc=%(max_proc)r, runner=%(runner)r', version=__version__, max_proc=self.max_proc, runner=self.runner, system='Launcher')