def upgrade():
    ioloop = IOLoop.current()
    config = Config()
    runner_factory = RunnerFactory(config)
    with session_scope() as session:
        for project in session.query(Project):
            if project.package is not None:
                continue
            storage = ProjectStorage(config.get('project_storage_dir'),
                                     project)
            version, eggf = storage.get_egg()
            runner = runner_factory.build(eggf)
            try:
                project_settings_module = ioloop.run_sync(
                    runner.settings_module)
                spider_list = ioloop.run_sync(runner.list)
                package = ProjectPackage()
                package.project = project
                package.type = 'scrapy'
                package.settings_module = project_settings_module
                package.spider_list = ','.join(spider_list)
                session.add(package)
                session.commit()
            except (ProcessFailed, InvalidProjectEgg) as ex:
                logger.error(ex)
Пример #2
0
    def test_post(self):
        project_name = 'test_project'
        self._upload_test_project()
        with session_scope() as session:
            project = session.query(Project).filter_by(
                name=project_name).first()

            project_storage = ProjectStorage(
                self._app.settings.get('project_storage_dir'), project)

            self.assertTrue(
                path.exists(
                    project_storage.storage_provider.get_project_eggs_dir(
                        project)))

            headers = {'Cookie': "_xsrf=dummy"}
            post_data = {'_xsrf': 'dummy'}
            res = self.fetch('/projects/%s/delete' % project_name,
                             method="POST",
                             headers=headers,
                             body=urlencode(post_data))
            self.assertEqual(200, res.code)

            # do not delete folder
            # self.assertFalse(path.exists(project_storage.storage_provider.get_project_eggs_dir(project)))
            self.assertEqual(len(project_storage.list_egg_versions()), 0)

            self.assertIsNone(
                session.query(Project).filter_by(name=project_name).first())

            self.assertEqual(
                0,
                len(
                    session.query(Spider).filter_by(
                        project_id=project.id).all()))
Пример #3
0
    def test_get_egg_versions(self):
        data_dir = 'data'
        project = Project(id=1, name='test_project', storage_version=1)
        target = ProjectStorage(data_dir=data_dir, project=project)
        self.test_put_egg()

        self.assertEqual(target.list_egg_versions(), ['1_0'])
Пример #4
0
    def test_get_egg(self):
        data_dir = 'data'
        project = Project(id=1, name='test_project', storage_version=1)
        version = None
        target = ProjectStorage(data_dir=data_dir, project=project)
        self.test_put_egg()

        get_version, get_file = target.get_egg(version)
        self.assertEqual(
            open('tests/test_project-1.0-py2.7.egg', 'rb').read(),
            get_file.read())
        self.assertEqual('1_0', get_version)
Пример #5
0
    def test_get_egg_with_none_exist_version(self):
        data_dir = 'data'
        project = Project(id=1, name='test_project', storage_version=1)
        version = '2.0'
        target = ProjectStorage(data_dir=data_dir, project=project)
        self.test_put_egg()

        try:
            get_version, get_file = target.get_egg(version)
            self.fail('Should not get non-existing version file')
        except EggFileNotFound:
            pass
Пример #6
0
    def test_put_egg(self):
        data_dir = 'data'
        project = Project(id=1, name='test_project', storage_version=1)
        test_project_egg = 'tests/test_project-1.0-py2.7.egg'
        fegg = open(test_project_egg, 'rb')
        version = '1.0'
        target = ProjectStorage(data_dir=data_dir, project=project)

        target.put_egg(fegg, version)

        target_egg_filepath = os.path.join(
            target.storage_provider.get_project_eggs_dir(project), '1_0.egg')

        self.assertTrue(os.path.exists(target_egg_filepath))
        self.assertTrue(cmp(test_project_egg, target_egg_filepath))
Пример #7
0
    def upload_project(self, user_id, project_name, version, eggf):
        runner = self.runner_factory.build(eggf)
        try:
            spiders = yield runner.list()
            logger.debug('spiders: %s' % spiders)
            project_settings_module = yield runner.settings_module()
        finally:
            runner.clear()

        with session_scope() as session:
            project = session.query(Project).filter_by(
                name=project_name).first()

            if project is None:
                project = Project()
                project.name = project_name
                project.storage_version = self.default_project_storage_version
            project.version = version
            session.add(project)
            package = project.package
            if not package:
                package = ProjectPackage()
                package.project = project
            package.type = 'scrapy'
            package.settings_module = project_settings_module
            package.spider_list = ','.join(spiders)
            session.add(package)
            session.flush()
            project_storage = ProjectStorage(self.project_storage_dir, project)
            project_storage.put_egg(eggf, version)
            session.refresh(project)

            for spider_name in spiders:
                spider = session.query(Spider).filter_by(
                    project_id=project.id, name=spider_name).first()
                if spider is None:
                    spider = Spider()
                    spider.name = spider_name
                    spider.project_id = project.id
                    session.add(spider)
                    session.commit()
                    session.refresh(spider)

            session.commit()
        raise Return(project)
Пример #8
0
 def delete_project(self, user_id, project_id):
     with session_scope() as session:
         project = session.query(Project).get(project_id)
         project_storage = ProjectStorage(
             self.project_storage_dir, project,
             self.default_project_storage_version)
         for spider in project.spiders:
             triggers = session.query(Trigger).filter_by(
                 spider_id=spider.id)
             session.query(SpiderExecutionQueue).filter_by(
                 spider_id=spider.id).delete()
             session.query(SpiderParameter).filter_by(
                 spider_id=spider.id).delete()
             session.commit()
             for trigger in triggers:
                 self.scheduler_manager.remove_schedule(
                     project.name, spider.name, trigger_id=trigger.id)
             session.query(SpiderExecutionQueue).filter_by(
                 spider_id=spider.id).delete()
             for historical_job in spider.historical_jobs:
                 project_storage.delete_job_data(historical_job)
                 session.delete(historical_job)
             session.delete(spider)
         project_storage.delete_egg()
         session.delete(project.package)
         session.delete(project)
Пример #9
0
    def put_job_data(self):
        data_dir = 'data'
        project = Project(id=1, name='test_project', storage_version=1)
        spider = Spider(project=project, name='test_spider')
        job = HistoricalJob(id=uuid4().hex, spider=spider)

        log_stream = BytesIO(b'test log here')
        items_stream = BytesIO(b'{"foo": "bar}')
        target = ProjectStorage(data_dir=data_dir, project=project)
        target.put_job_data(job, log_stream, items_stream)

        saved_log_stream = target.get_job_log(job)
        log_stream.seek(0)
        self.assertEqual(log_stream.read(), saved_log_stream.read())

        saved_items_stream = target.get_job_items(job)
        items_stream.seek(0)
        self.assertEqual(items_stream.read(), saved_items_stream.read())