示例#1
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     file_settings = msg.pop('file_settings', None)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     tmpfile = None
     if file_settings:
         with NamedTemporaryFile('w',
                                 encoding='utf-8',
                                 suffix='.py',
                                 delete=False) as tmp:
             tmp.write(file_settings)
         path, name_file = os.path.split(tmp.name)
         module = os.path.splitext(name_file)[0]
         env['PYTHONPATH'] = '{}:{}'.format(path, os.environ.get('PYTHONPATH')) \
             if os.environ.get('PYTHONPATH') else path
         env['SCRAPY_SETTINGS_MODULE_TO_OVERRIDE'] = module
         tmpfile = tmp.name
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env, tmpfile)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
示例#2
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
示例#3
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env, msg)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
示例#4
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     log.msg(format='Scrapyd %(version)s started: name=%(_name)r, env=%(env)r', version=__version__, name=env.get('_name',''), env=env)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], \
         msg['_job'], env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
示例#5
0
 def render_POST(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     settings = args.pop('setting', [])
     settings = dict(x.split('=', 1) for x in settings)
     args = dict((k, v[0]) for k, v in args.items())
     project = args.pop('project')
     spider = args.pop('spider')
     version = args.get('_version', '')
     priority = float(args.pop('priority', 0))
     spiders = get_spider_list(project, version=version)
     if spider not in spiders:
         return {
             "status": "error",
             "message": "spider '%s' not found" % spider
         }
     args['settings'] = settings
     jobid = args.pop('jobid', uuid.uuid1().hex)
     args['_job'] = jobid
     self.root.scheduler.schedule(project,
                                  spider,
                                  priority=priority,
                                  **args)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "jobid": jobid
     }
示例#6
0
 def render_POST(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args['project'][0]
     version = args['version'][0]
     self._delete_version(project, version)
     UtilsCache.invalid_cache(project)
     return {"node_name": self.root.nodename, "status": "ok"}
示例#7
0
 def render_GET(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args.get('project', [None])[0]
     spiders = self.root.launcher.processes.values()
     queues = self.root.poller.queues
     pending = [{
         "project": qname,
         "spider": x["name"],
         "id": x["_job"]
     } for qname in (queues if project is None else [project])
                for x in queues[qname].list()]
     running = [{
         "project": s.project,
         "spider": s.spider,
         "id": s.job,
         "pid": s.pid,
         "start_time": str(s.start_time),
     } for s in spiders if project is None or s.project == project]
     finished = [{
         "project": s.project,
         "spider": s.spider,
         "id": s.job,
         "start_time": str(s.start_time),
         "end_time": str(s.end_time)
     } for s in self.root.launcher.finished
                 if project is None or s.project == project]
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "pending": pending,
         "running": running,
         "finished": finished
     }
示例#8
0
 def render_GET(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args['project'][0]
     versions = self.root.eggstorage.list(project)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "versions": versions
     }
示例#9
0
 def render_GET(self, txrequest):
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args['project'][0]
     version = args.get('_version', [''])[0]
     spiders = get_spider_list(project,
                               runner=self.root.runner,
                               version=version)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "spiders": spiders
     }
示例#10
0
文件: app.py 项目: zanachka/dscrapy
def spider_stats():
    raw_stats = native_stringify_dict(redis_cli.get_all_stats(),
                                      keys_only=False)
    stats = {
        'start_time': start_time,
        'dupefilter/filtered': raw_stats.get('dupefilter/filtered', 0),
        'item_scraped_count': raw_stats.get('item_scraped_count', 0),
        'response_received_count': raw_stats.get('response_received_count', 0),
        'Page Crawled Speed': "%s pages/min" % logstats.prate,
        'Item Scraped Speed': "%s items/min" % logstats.irate,
        'Avg Page Crawled Speed': "%.2f pages/min" % logstats.avg_prate,
        'Avg Item Scraped Speed': "%.2f items/min" % logstats.avg_irate,
    }
    return jsonify(stats)
示例#11
0
 def render_POST(self, txrequest):
     eggf = BytesIO(txrequest.args.pop(b'egg')[0])
     args = native_stringify_dict(copy(txrequest.args), keys_only=False)
     project = args['project'][0]
     version = args['version'][0]
     self.root.eggstorage.put(eggf, project, version)
     spiders = get_spider_list(project, version=version)
     self.root.update_projects()
     UtilsCache.invalid_cache(project)
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "project": project,
         "version": version,
         "spiders": len(spiders)
     }
示例#12
0
 def render_POST(self, txrequest):
     args = dict((k, v[0])
                 for k, v in native_stringify_dict(copy(txrequest.args),
                                                   keys_only=False).items())
     project = args['project']
     jobid = args['job']
     signal = args.get('signal', 'TERM')
     prevstate = None
     queue = self.root.poller.queues[project]
     c = queue.remove(lambda x: x["_job"] == jobid)
     if c:
         prevstate = "pending"
     spiders = self.root.launcher.processes.values()
     for s in spiders:
         if s.project == project and s.job == jobid:
             s.transport.signalProcess(signal)
             prevstate = "running"
     return {
         "node_name": self.root.nodename,
         "status": "ok",
         "prevstate": prevstate
     }
示例#13
0
def index():
    return jsonify(native_stringify_dict(zw.children, keys_only=False))
示例#14
0
文件: app.py 项目: zanachka/dscrapy
def allstats():
    return jsonify(
        native_stringify_dict(redis_cli.get_all_stats(), keys_only=False))