def test_get_spider_list(self): # mybot.egg has two spiders, spider1 and spider2 self.add_test_version('mybot.egg', 'mybot', 'r1') spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd()) self.assertEqual(sorted(spiders), ['spider1', 'spider2']) # mybot2.egg has three spiders, spider1, spider2 and spider3... # BUT you won't see it here because it's cached. # Effectivelly it's like if version was never added self.add_test_version('mybot2.egg', 'mybot', 'r2') spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd()) self.assertEqual(sorted(spiders), ['spider1', 'spider2']) # Let's invalidate the cache for this project... UtilsCache.invalid_cache('mybot') # Now you get the updated list spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd()) self.assertEqual(sorted(spiders), ['spider1', 'spider2', 'spider3']) # Let's re-deploy mybot.egg and clear cache. It now sees 2 spiders self.add_test_version('mybot.egg', 'mybot', 'r3') UtilsCache.invalid_cache('mybot') spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd()) self.assertEqual(sorted(spiders), ['spider1', 'spider2']) # And re-deploying the one with three (mybot2.egg) with a version that # isn't the higher, won't change what get_spider_list() returns. self.add_test_version('mybot2.egg', 'mybot', 'r1a') UtilsCache.invalid_cache('mybot') spiders = get_spider_list('mybot', pythonpath=get_pythonpath_scrapyd()) self.assertEqual(sorted(spiders), ['spider1', 'spider2'])
def post(self, request, project, version, egg, **kwargs): eggf = StringIO(egg) self.root.eggstorage.put(eggf, project, version) spiders = get_spider_list(project) self.root.update_projects() return {"status": "ok", "project": project, "version": version, \ "spiders": len(spiders)}
def render_POST(self, txrequest): args = native_stringify_dict(copy(txrequest.args), keys_only=False) settings = args.pop('setting', []) settings = dict(x.split('=', 1) for x in settings) args = dict((k, v[0]) for k, v in args.items()) project = args.pop('project') spider = args.pop('spider') version = args.get('_version', '') priority = float(args.pop('priority', 0)) spiders = get_spider_list(project, version=version) if spider not in spiders: return { "status": "error", "message": "spider '%s' not found" % spider } args['settings'] = settings jobid = args.pop('jobid', uuid.uuid1().hex) args['_job'] = jobid self.root.scheduler.schedule(project, spider, priority=priority, **args) return { "node_name": self.root.nodename, "status": "ok", "jobid": jobid }
def render_GET(self, txrequest): projects = {} for project in self.root.scheduler.list_projects(): spiders = get_spider_list(project, runner=self.root.runner) versions = self.root.eggstorage.list(project) projects[project] = {"spiders": spiders, "versions": versions} return {"status": "ok", "projects":projects}
def render_POST(self, txrequest): project = txrequest.args['project'][0] version = txrequest.args['version'][0] eggf = StringIO(txrequest.args['egg'][0]) self.root.eggstorage.put(eggf, project, version) spiders = get_spider_list(project) self.root.update_projects() return {"status": "ok", "project": project, "version": version, \ "spiders": len(spiders)}
def projects(): project_spiders = [] scrapyd_error = None for project in self.root.scheduler.list_projects(): if project is '.DS_Store': pass try: project_spiders.append({'name': project, 'spiders': get_spider_list(project, runner=self.root.runner)}) except RuntimeError, e: scrapyd_error = "%s: %s" % (project, e.message)
def render_GET(self, txrequest): args = native_stringify_dict(copy(txrequest.args), keys_only=False) project = args['project'][0] version = args.get('_version', [''])[0] spiders = get_spider_list(project, runner=self.root.runner, version=version) return { "node_name": self.root.nodename, "status": "ok", "spiders": spiders }
def render_POST(self, txrequest): eggf = BytesIO(txrequest.args.pop(b'egg')[0]) args = native_stringify_dict(copy(txrequest.args), keys_only=False) project = args['project'][0] version = args['version'][0] self.root.eggstorage.put(eggf, project, version) spiders = get_spider_list(project, version=version) self.root.update_projects() UtilsCache.invalid_cache(project) return { "node_name": self.root.nodename, "status": "ok", "project": project, "version": version, "spiders": len(spiders) }
def test_get_spider_list(self): path = os.path.abspath(self.mktemp()) j = os.path.join eggs_dir = j(path, 'eggs') os.makedirs(eggs_dir) dbs_dir = j(path, 'dbs') os.makedirs(dbs_dir) logs_dir = j(path, 'logs') os.makedirs(logs_dir) os.chdir(path) with open('scrapyd.conf', 'w') as f: f.write("[scrapyd]\n") f.write("eggs_dir = %s\n" % eggs_dir) f.write("dbs_dir = %s\n" % dbs_dir) f.write("logs_dir = %s\n" % logs_dir) app = get_application() eggstorage = app.getComponent(IEggStorage) eggfile = StringIO(get_data(__package__, 'mybot.egg')) eggstorage.put(eggfile, 'mybot', 'r1') self.assertEqual(sorted(get_spider_list('mybot')), ['spider1', 'spider2'])
def test_get_spider_list_unicode(self): # mybotunicode.egg has two spiders, araña1 and araña2 self.add_test_version('mybotunicode.egg', 'mybotunicode', 'r1') spiders = get_spider_list('mybotunicode', pythonpath=get_pythonpath_scrapyd()) self.assertEqual(sorted(spiders), [u'araña1', u'araña2'])
def get_spiders(self, request, pk, **kwargs): if pk not in self.root.scheduler.list_projects(): return corepost.Response(404, entity="Not found", headers={"Content-Type":"text/plain"}) return get_spider_list(pk, runner=self.root.runner)
def render_GET(self, txrequest): project = txrequest.args['project'][0] spiders = get_spider_list(project, runner=self.root.runner) return {"status": "ok", "spiders": spiders}