def test_commit(self): with session_scope() as session: project = Project() project.name = 'test project' session.add(project) # after scope closed, the data should be already commited with session_scope() as session: project = session.query(Project).first() self.assertEqual('test project', project.name)
def test_register(self): with session_scope() as session: session.query(Node).delete() response = self.fetch('/nodes', method="POST", body="") with session_scope() as session: new_node = session.query(Node).first() self.assertEqual(200, response.code) self.assertEqual('127.0.0.1', new_node.client_ip) self.assertEqual(datetime.date.today(), new_node.create_time.date()) self.assertEqual(datetime.date.today(), new_node.last_heartbeat.date()) self.assertEqual(True, new_node.isalive) self.assertEqual(None, new_node.tags)
def test_post_create(self): project_name = 'test_project' postdata = {'project': project_name} response = self.fetch('/delproject.json', method='POST', body=urlencode(postdata)) self.assertIn(response.code, [404, 200]) post_data = {} post_data['egg'] = open(TEST_EGG_FILE, 'rb') post_data['project'] = project_name post_data['version'] = '1.0' post_data['_xsrf'] = 'dummy' datagen, headers = multipart_encode(post_data) databuffer = b''.join([ensure_binary(x) for x in datagen]) headers['Cookie'] = "_xsrf=dummy" response = self.fetch('/addversion.json', method='POST', headers=headers, body=databuffer) self.assertEqual(200, response.code) with session_scope() as session: project = session.query(Project)\ .filter_by(name=project_name).first() self.assertIsNotNone(project) self.assertEqual(project.name, project_name)
def test_post(self): project_name = 'test_project' self._upload_test_project() with session_scope() as session: project = session.query(Project).filter_by( name=project_name).first() project_storage = ProjectStorage( self._app.settings.get('project_storage_dir'), project) self.assertTrue( path.exists( project_storage.storage_provider.get_project_eggs_dir( project))) headers = {'Cookie': "_xsrf=dummy"} post_data = {'_xsrf': 'dummy'} res = self.fetch('/projects/%s/delete' % project_name, method="POST", headers=headers, body=urlencode(post_data)) self.assertEqual(200, res.code) # do not delete folder # self.assertFalse(path.exists(project_storage.storage_provider.get_project_eggs_dir(project))) self.assertEqual(len(project_storage.list_egg_versions()), 0) self.assertIsNone( session.query(Project).filter_by(name=project_name).first()) self.assertEqual( 0, len( session.query(Spider).filter_by( project_id=project.id).all()))
def get_app(self): config = Config() scheduler_manager = SchedulerManager(config=config) scheduler_manager.init() node_manager = NodeManager(scheduler_manager) node_manager.init() secret_key = '123' with session_scope() as session: user = session.query(User).filter_by(username='******').first() user.password = encrypt_password('password', secret_key) session.add(user) session.commit() normal_user = session.query(User).filter_by( username='******').first() if not normal_user: normal_user = User() normal_user.username = '******' normal_user.is_admin = False normal_user.password = encrypt_password('passw0rd', secret_key) session.add(normal_user) session.commit() return make_app(scheduler_manager, node_manager, None, secret_key='123', enable_authentication=True)
def test_post(self): project_name = 'test_project' post_data = {} post_data['egg'] = open( path.join(path.dirname(__file__), '..', 'test_project-1.0-py2.7.egg'), 'rb') post_data['project'] = project_name post_data['version'] = '1.0' post_data['_xsrf'] = 'dummy' datagen, headers = multipart_encode(post_data) databuffer = b''.join(datagen) headers['Cookie'] = "_xsrf=dummy" response = self.fetch('/uploadproject', method='POST', headers=headers, body=databuffer) self.assertEqual(200, response.code) with session_scope() as session: project = session.query(Project).filter_by( name=project_name).first() self.assertIsNotNone(project) self.assertEqual(project.name, project_name)
def test_post_with_triggers(self): project_name = 'test_project' spider_name = 'error_spider' self._upload_test_project() headers = {'Cookie': "_xsrf=dummy"} with session_scope() as session: project = session.query(Project)\ .filter_by(name=project_name)\ .first() post_data = {'_xsrf': 'dummy', 'cron': '0 0 0 0 0'} res = self.fetch('/projects/%s/spiders/%s/triggers' % (project_name, spider_name), method='POST', headers=headers, body=urlencode(post_data)) self.assertEqual(200, res.code) post_data = {'_xsrf': 'dummy'} res = self.fetch('/projects/%s/delete' % project_name, method="POST", headers=headers, body=urlencode(post_data)) self.assertEqual(200, res.code)
def upgrade(): ioloop = IOLoop.current() config = Config() runner_factory = RunnerFactory(config) with session_scope() as session: for project in session.query(Project): if project.package is not None: continue storage = ProjectStorage(config.get('project_storage_dir'), project) version, eggf = storage.get_egg() runner = runner_factory.build(eggf) try: project_settings_module = ioloop.run_sync( runner.settings_module) spider_list = ioloop.run_sync(runner.list) package = ProjectPackage() package.project = project package.type = 'scrapy' package.settings_module = project_settings_module package.spider_list = ','.join(spider_list) session.add(package) session.commit() except (ProcessFailed, InvalidProjectEgg) as ex: logger.error(ex)
def init_project(self, project_name): with session_scope() as session: project = session.query(Project).filter_by( name=project_name).first() if project: self.project_manager.delete_project('', project.id) AppTest.init_project()
def delete_project(self, user_id, project_id): with session_scope() as session: project = session.query(Project).get(project_id) project_storage = ProjectStorage( self.project_storage_dir, project, self.default_project_storage_version) for spider in project.spiders: triggers = session.query(Trigger).filter_by( spider_id=spider.id) session.query(SpiderExecutionQueue).filter_by( spider_id=spider.id).delete() session.query(SpiderParameter).filter_by( spider_id=spider.id).delete() session.commit() for trigger in triggers: self.scheduler_manager.remove_schedule( project.name, spider.name, trigger_id=trigger.id) session.query(SpiderExecutionQueue).filter_by( spider_id=spider.id).delete() for historical_job in spider.historical_jobs: project_storage.delete_job_data(historical_job) session.delete(historical_job) session.delete(spider) project_storage.delete_egg() session.delete(project.package) session.delete(project)
def register_node(self): with session_scope() as session: session.query(Node).delete() response = self.fetch('/nodes', method="POST", body="") self.assertEqual(200, response.code) return json.loads(response.body)['id']
def test_get(self): self._upload_test_project() with session_scope() as session: spider = session.query(Spider).first() self.assertIsNotNone(spider) response = self.fetch('/spiders/%d/egg' % (spider.id, )) self.assertEqual(200, response.code)
def test_register_realip(self): with session_scope() as session: session.query(Node).delete() headers = {'X-Real-IP': '1.2.3.4'} response = self.fetch('/nodes', method="POST", headers=headers, body="") with session_scope() as session: new_node = session.query(Node).first() self.assertEqual(200, response.code) self.assertEqual('1.2.3.4', new_node.client_ip) self.assertEqual(datetime.date.today(), new_node.create_time.date()) self.assertEqual(datetime.date.today(), new_node.last_heartbeat.date()) self.assertEqual(True, new_node.isalive) self.assertEqual(None, new_node.tags)
def test_rollback(self): class CommitFailedError(Exception): pass try: with session_scope() as session: project = Project() project.name = 'test project' session.add(project) raise CommitFailedError() except CommitFailedError: pass # after an exception raised in the scope, the data should be rolled back. with session_scope() as session: project = session.query(Project).first() self.assertIsNone(project)
def test_job_complete(self): project_name = 'test_project' spider_name = 'success_spider' node_id = self.register_node() # schedule a job with session_scope() as session: session.query(SpiderExecutionQueue).delete() session.commit() run_spider_post_data = {'project': project_name, 'spider': spider_name} res = self.fetch('/schedule.json', method='POST', body=urlencode(run_spider_post_data)) self.assertEqual(200, res.code) # fetch a job next_job_post_data = {'node_id': node_id} headers = {'X-Dd-Nodeid': str(node_id)} res = self.fetch('/executing/next_task', method='POST', body=urlencode(next_job_post_data), headers=headers) self.assertEqual(200, res.code) task_id = json.loads(res.body)['data']['task']['task_id'] # job start post_data = {'pid' : '1'} headers = {'X-Dd-Nodeid': str(node_id)} res = self.fetch('/jobs/%s/start' % task_id, method='POST', headers=headers, body=urlencode(post_data)) self.assertEqual(200, res.code) # complete this job post_data = {'task_id': task_id, 'status': 'success'} post_data['log'] = BytesIO(b'some logs') post_data['items'] = BytesIO(b'{"a" : "some items"}') datagen, headers = multipart_encode(post_data) headers['X-Dd-Nodeid'] = str(node_id) # res = self.fetch('/executing/complete', method='POST', headers=headers, body_producer=MultipartRequestBodyProducer(datagen)) self.assertEqual(200, res.code) with session_scope() as session: complete_job = session.query(HistoricalJob).filter_by(id=task_id).first() self.assertIsNotNone(complete_job) self.assertEqual(2, complete_job.status)
def test_post(self): with session_scope() as session: session.query(SpiderExecutionQueue).delete() session.commit() # schedule once project = 'test_project' spider = 'success_spider' postdata = urlencode({'project': project, 'spider': spider}) response = self.fetch('/schedule.json', method='POST', body=postdata) self.assertEqual(200, response.code)
def test_get(self): self._upload_test_project() with session_scope() as session: spider = session.query(Spider).first() project = spider.project self.assertIsNotNone(spider) self.assertIsNotNone(project) response = self.fetch('/projects/%s/spiders/%s' % (project.name, spider.name)) self.assertEqual(200, response.code)
def test_get_egg_by_project_spider_name(self): self._upload_test_project() with session_scope() as session: spider = session.query(Spider).first() project = spider.project self.assertIsNotNone(spider) self.assertIsNotNone(project) response = self.fetch('/projects/%s/spiders/%s/egg' % ('test_project', 'log_spider')) self.assertEqual(200, response.code)
def test_post(self): project_name = 'test_project' postdata = {'project': project_name} response = self.fetch('/delproject.json', method='POST', body=urlencode(postdata)) self.assertIn(response.code, [404, 200]) with session_scope() as session: project = session.query(Project)\ .filter_by(name=project_name).first() self.assertIsNone(project)
def setUp(self): super(NodeSecureTest, self).setUp() with session_scope() as session: node = Node() session.add(node) nodekey = NodeKey() nodekey.key = str(uuid.uuid4()) nodekey.create_at = datetime.datetime.now() nodekey.secret_key = generate_random_string(32) session.add(nodekey) session.commit() self.node_key = nodekey self.node_id = node.id
def test_post(self): node_key = self.node_manager.create_node_key() headers = {'Authorization': '%s %s %s' % ('HMAC', node_key.key, generate_digest(node_key.secret_key, 'POST', '/nodes/register', '', ''))} response = self.fetch('/nodes/register', method="POST", body="", headers=headers) self.assertEqual(200, response.code) new_node_id = json.loads(response.body)['id'] self.assertTrue(new_node_id > 0) with session_scope() as session: node = session.query(Node).get(new_node_id) self.assertEqual(node.node_key_id, node_key.id) updated_node_key = session.query(NodeKey).get(node_key.id) self.assertEqual(updated_node_key.used_node_id, new_node_id)
def test_node_creation(self): with session_scope() as session: session.query(NodeKey).delete() session.commit() self.assertEqual(0, len(session.query(NodeKey).all())) response = self.fetch('/admin/nodes') self.assertEqual(200, response.code) self.assertEqual(1, len(session.query(NodeKey).all())) usable_key = session.query(NodeKey).filter( NodeKey.used_node_id.is_(None), NodeKey.is_deleted == False).first() self.assertEqual(False, usable_key.is_deleted) self.assertIsNone(usable_key.used_node_id)
def upload_project(self, user_id, project_name, version, eggf): runner = self.runner_factory.build(eggf) try: spiders = yield runner.list() logger.debug('spiders: %s' % spiders) project_settings_module = yield runner.settings_module() finally: runner.clear() with session_scope() as session: project = session.query(Project).filter_by( name=project_name).first() if project is None: project = Project() project.name = project_name project.storage_version = self.default_project_storage_version project.version = version session.add(project) package = project.package if not package: package = ProjectPackage() package.project = project package.type = 'scrapy' package.settings_module = project_settings_module package.spider_list = ','.join(spiders) session.add(package) session.flush() project_storage = ProjectStorage(self.project_storage_dir, project) project_storage.put_egg(eggf, version) session.refresh(project) for spider_name in spiders: spider = session.query(Spider).filter_by( project_id=project.id, name=spider_name).first() if spider is None: spider = Spider() spider.name = spider_name spider.project_id = project.id session.add(spider) session.commit() session.refresh(spider) session.commit() raise Return(project)
def test_post_real_ip(self): self.node_key = self.node_manager.create_node_key() res = self.fetch_secure('/nodes/register', method='POST', body='') self.assertEqual(200, res.code) node_id = json.loads(res.body)['id'] headers = {'Authorization': '%s %s %s' % ('HMAC', self.node_key.key, generate_digest(self.node_key.secret_key, 'POST', '/nodes', '', ''))} headers['X-Real-IP'] = '1.2.3.4' response = self.fetch('/nodes', method="POST", body="", headers=headers) self.assertEqual(200, response.code) new_node_id = json.loads(response.body)['id'] self.assertTrue(new_node_id > 0) with session_scope() as session: active_node = session.query(Node).get(new_node_id) self.assertEqual(active_node.client_ip, '1.2.3.4')
def test_get_key_expire(self): with session_scope() as session: session.query(NodeKey).delete() expire_key = NodeKey() expire_key.key = 'abc' expire_key.secret_key = 'cba' expire_key.is_deleted = False expire_key.create_at = datetime.datetime.now( ) - datetime.timedelta(hours=2) session.add(expire_key) session.commit() session.expire_all() headers = self.populate_cookie_header(headers={}) response = self.fetch('/admin/nodes', headers=headers) self.assertEqual(200, response.code) expire_key = session.query(NodeKey).filter_by(key='abc').first() self.assertTrue(expire_key.is_deleted) new_key = session.query(NodeKey).filter_by( is_deleted=False).first() self.assertIsNotNone(new_key)
def test_open(self): with session_scope() as session: pass