示例#1
0
    def process(self, *args, **kwargs):
        if not args:
            args = [None]

        callback = kwargs.pop('callback')

        nargs = args
        for item in self.steps:
            logging.debug("Running %s %r", item.name, args[0])

            try:
                t0 = time.time()
                yvalue = yield gen.Task(item.process, *nargs, **kwargs)
                dt = time.time() - t0

                systemMetrics.add('pipeline:%s:time' % item.name, dt)
                systemMetrics.incr('pipeline:%s:calls' % item.name)
            except Exception as e:
                logging.error("Pipeline Exception", exc_info=e)

            action, res = yvalue

            if action == Step.STOP:
                logging.debug("Processing stoped by %s" % item.__class__.__name__)
                break
            nargs = [res]

        if callback:
            callback(res)
示例#2
0
文件: app.py 项目: pavanfan/spiro
    def loop(self):
        if self._stopping:
            return

        if not self.queue or not self.app.user_settings.crawler_running:
            self.ioloop.add_timeout(timedelta(seconds=1), self.loop)
            return

        task = None
        try:
            task, complete_cb = yield gen.Task(self.queue.pop)
        except Exception as e:
            pass

        if not task:
            self.ioloop.add_timeout(
                timedelta(seconds=self.user_settings.crawl_delay), self.loop)
            return

        logging.debug("Staring task url=%s" % task.url)

        self.running_fetchers += 1

        yield gen.Task(self.pipeline.process, task)
        complete_cb(True, task)

        self.total_fetch_count += 1
        self.running_fetchers -= 1

        if task.response:
            systemMetrics.add('response:%s' % task.url_host,
                              task.response.request_time)
            models.LogEvent("Crawled %d %s" %
                            (task.response.code, task.url)).save()
        else:
            models.LogEvent("NOT Crawled %s" % (task.url)).save()
        logging.debug("Finished task url=%s" % task.url)

        self.ioloop.add_callback(self.loop)