Exemplo n.º 1
0
    def add_url(self, url):
        if not isValidScheme(url):
            logger.warning("not vaild_scheme")
            return

        logger.debug("get url: %s" % url)

        self.fetch_queue.put(url)
Exemplo n.º 2
0
    def do_work(self, url):
        if not isValidScheme(url):
            logger.warning("not vaild_scheme")
            return None

        try:
            response = yield self.fetch(url)

        except tornado.httpclient.HTTPError as e:
            # import traceback
            # traceback.print_exc()

            # TODO
            # Some bug here. Too many file open.

            # with open('httperror.txt', "a") as f:
            # f.write("Url: %s HTTPError: %s \n"% (url,e.code))

            logger.error("Url: %s HTTPError: %s " % (url, e.code))

        except:
            import traceback

            traceback.print_exc()
            logger.error("Unknow error with url: %s" % url)

        else:
            url_gen = self.parse(response)
            self.fetch_finished.append(url)

            sender = Sender()
            for u in url_gen:
                sender.add_url(u)
            logging.info("fetched %s" % url)

        self.fetching -= 1