def upgrade(): ioloop = IOLoop.current() config = Config() runner_factory = RunnerFactory(config) with session_scope() as session: for project in session.query(Project): if project.package is not None: continue storage = ProjectStorage(config.get('project_storage_dir'), project) version, eggf = storage.get_egg() runner = runner_factory.build(eggf) try: project_settings_module = ioloop.run_sync( runner.settings_module) spider_list = ioloop.run_sync(runner.list) package = ProjectPackage() package.project = project package.type = 'scrapy' package.settings_module = project_settings_module package.spider_list = ','.join(spider_list) session.add(package) session.commit() except (ProcessFailed, InvalidProjectEgg) as ex: logger.error(ex)
def test_post(self): project_name = 'test_project' self._upload_test_project() with session_scope() as session: project = session.query(Project).filter_by( name=project_name).first() project_storage = ProjectStorage( self._app.settings.get('project_storage_dir'), project) self.assertTrue( path.exists( project_storage.storage_provider.get_project_eggs_dir( project))) headers = {'Cookie': "_xsrf=dummy"} post_data = {'_xsrf': 'dummy'} res = self.fetch('/projects/%s/delete' % project_name, method="POST", headers=headers, body=urlencode(post_data)) self.assertEqual(200, res.code) # do not delete folder # self.assertFalse(path.exists(project_storage.storage_provider.get_project_eggs_dir(project))) self.assertEqual(len(project_storage.list_egg_versions()), 0) self.assertIsNone( session.query(Project).filter_by(name=project_name).first()) self.assertEqual( 0, len( session.query(Spider).filter_by( project_id=project.id).all()))
def test_get_egg_versions(self): data_dir = 'data' project = Project(id=1, name='test_project', storage_version=1) target = ProjectStorage(data_dir=data_dir, project=project) self.test_put_egg() self.assertEqual(target.list_egg_versions(), ['1_0'])
def test_get_egg(self): data_dir = 'data' project = Project(id=1, name='test_project', storage_version=1) version = None target = ProjectStorage(data_dir=data_dir, project=project) self.test_put_egg() get_version, get_file = target.get_egg(version) self.assertEqual( open('tests/test_project-1.0-py2.7.egg', 'rb').read(), get_file.read()) self.assertEqual('1_0', get_version)
def test_get_egg_with_none_exist_version(self): data_dir = 'data' project = Project(id=1, name='test_project', storage_version=1) version = '2.0' target = ProjectStorage(data_dir=data_dir, project=project) self.test_put_egg() try: get_version, get_file = target.get_egg(version) self.fail('Should not get non-existing version file') except EggFileNotFound: pass
def test_put_egg(self): data_dir = 'data' project = Project(id=1, name='test_project', storage_version=1) test_project_egg = 'tests/test_project-1.0-py2.7.egg' fegg = open(test_project_egg, 'rb') version = '1.0' target = ProjectStorage(data_dir=data_dir, project=project) target.put_egg(fegg, version) target_egg_filepath = os.path.join( target.storage_provider.get_project_eggs_dir(project), '1_0.egg') self.assertTrue(os.path.exists(target_egg_filepath)) self.assertTrue(cmp(test_project_egg, target_egg_filepath))
def upload_project(self, user_id, project_name, version, eggf): runner = self.runner_factory.build(eggf) try: spiders = yield runner.list() logger.debug('spiders: %s' % spiders) project_settings_module = yield runner.settings_module() finally: runner.clear() with session_scope() as session: project = session.query(Project).filter_by( name=project_name).first() if project is None: project = Project() project.name = project_name project.storage_version = self.default_project_storage_version project.version = version session.add(project) package = project.package if not package: package = ProjectPackage() package.project = project package.type = 'scrapy' package.settings_module = project_settings_module package.spider_list = ','.join(spiders) session.add(package) session.flush() project_storage = ProjectStorage(self.project_storage_dir, project) project_storage.put_egg(eggf, version) session.refresh(project) for spider_name in spiders: spider = session.query(Spider).filter_by( project_id=project.id, name=spider_name).first() if spider is None: spider = Spider() spider.name = spider_name spider.project_id = project.id session.add(spider) session.commit() session.refresh(spider) session.commit() raise Return(project)
def delete_project(self, user_id, project_id): with session_scope() as session: project = session.query(Project).get(project_id) project_storage = ProjectStorage( self.project_storage_dir, project, self.default_project_storage_version) for spider in project.spiders: triggers = session.query(Trigger).filter_by( spider_id=spider.id) session.query(SpiderExecutionQueue).filter_by( spider_id=spider.id).delete() session.query(SpiderParameter).filter_by( spider_id=spider.id).delete() session.commit() for trigger in triggers: self.scheduler_manager.remove_schedule( project.name, spider.name, trigger_id=trigger.id) session.query(SpiderExecutionQueue).filter_by( spider_id=spider.id).delete() for historical_job in spider.historical_jobs: project_storage.delete_job_data(historical_job) session.delete(historical_job) session.delete(spider) project_storage.delete_egg() session.delete(project.package) session.delete(project)
def put_job_data(self): data_dir = 'data' project = Project(id=1, name='test_project', storage_version=1) spider = Spider(project=project, name='test_spider') job = HistoricalJob(id=uuid4().hex, spider=spider) log_stream = BytesIO(b'test log here') items_stream = BytesIO(b'{"foo": "bar}') target = ProjectStorage(data_dir=data_dir, project=project) target.put_job_data(job, log_stream, items_stream) saved_log_stream = target.get_job_log(job) log_stream.seek(0) self.assertEqual(log_stream.read(), saved_log_stream.read()) saved_items_stream = target.get_job_items(job) items_stream.seek(0) self.assertEqual(items_stream.read(), saved_items_stream.read())