示例#1
0
 def __init__(self, crawler):
     self.settings = crawler.settings
     self.logger = Logger.from_crawler(crawler)
     if self.settings.getbool("CUSTOM_REDIS"):
         from custom_redis.client import Redis
     else:
         from redis import Redis
     self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                             self.settings.getint("REDIS_PORT"))
     self.queue_name = None
     self.queues = {}
示例#2
0
    def setup(self):

        self.failed_count, self.failed_rate, self.sucess_rate = 0, 0, 0

        if self.custom:
            from custom_redis.client import Redis
        else:
            from redis import Redis

        self.redis_conn = Redis(host=self.host, port=self.port)
        self.clean_previous_task(self.crawlid)
示例#3
0
 def __init__(self, redis_host, redis_port, redis_key, **kwargs):
     try:
         from custom_redis.client import Redis
     except ImportError:
         try:
             from redis import Redis
         except ImportError:
             warnings.warn(
                 "RedisSource depends on redis, try: pip install redis. ")
             exit(1)
     self.redis_key = redis_key
     self.redis_conn = Redis(redis_host, redis_port)
示例#4
0
 def __init__(self, crawler):
     self.settings = crawler.settings
     self.logger = CustomLogger.from_crawler(crawler)
     if self.settings.getbool("CUSTOM_REDIS"):
         from custom_redis.client import Redis
     else:
         from redis import Redis
     self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                             self.settings.getint("REDIS_PORT"))
     self.queue_name = None
     self.queues = {}
     self.request_interval = 60 / self.settings.getint("SPEED", 60)
     self.last_acs_time = time.time()
示例#5
0
    def setup(self):

        self.failed_count, self.failed_rate, self.sucess_rate = 0, 0, 0

        if self.custom:
            from custom_redis.client import Redis
        else:
            from redis import Redis

        self.redis_conn = Redis(host=self.host, port=self.port)
        self.redis_conn.delete("crawlid:%s" % self.crawlid)
        self.redis_conn.delete("failed_pages:%s" % self.crawlid)
        self.redis_conn.delete("crawlid:%s:model" % self.crawlid)
 def __init__(self, settings):
     self.settings_file = settings
     Logger.__init__(self, settings)
     self.set_logger()
     MultiThreadClosing.__init__(self)
     self.de_queue = Queue()
     if self.settings.get("CUSTOM_REDIS"):
         from custom_redis.client import Redis
     else:
         from redis import Redis
     self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                             self.settings.get("REDIS_PORT"))
     self.small = False
示例#7
0
    def __init__(self, crawler):

        self.settings = crawler.settings
        self.set_logger(crawler)

        if self.settings.get("CUSTOM_REDIS"):
            from custom_redis.client import Redis
        else:
            from redis import Redis

        self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                                self.settings.get("REDIS_PORT"))
        self.queue_name = "%s:*:queue"
        self.queues = {}
        self.extract = tldextract.extract
示例#8
0
def start(crawlid, host, custom):
    if custom:
        from custom_redis.client import Redis
    else:
        from redis import Redis

    redis_conn = Redis(host)
    key = "crawlid:%s" % crawlid
    failed_pages = int(redis_conn.hget(key, "failed_download_pages") or 0)
    format(redis_conn.hgetall(key))
    if failed_pages:
        print_if = raw_input("show the failed pages? y/n default n:")
        if print_if == "y":
            key_ = "failed_pages:%s" % crawlid
            p = redis_conn.hgetall(key_)
            format(p, True)
示例#9
0
def start(crawlid, host, port, custom):
    if custom:
        from custom_redis.client import Redis
    else:
        from redis import Redis
    redis_conn = Redis(host, port)
    key = "crawlid:%s" % crawlid
    data = redis_conn.hgetall(key)
    failed_keys = [x for x in data.keys() if fnmatch.fnmatch(
        x.decode() if isinstance(x, bytes) else x, "failed_download_*")]
    format(data)
    for fk in failed_keys:
        fk = fk.decode() if isinstance(fk, bytes) else fk
        print_if = input("show the %s? y/n default n:" % fk.replace("_", " "))
        if print_if == "y":
            key_ = "%s:%s" % (fk, crawlid)
            p = redis_conn.hgetall(key_)
            format(p, True)
示例#10
0
def start(crawlid, host, custom):
    if custom:
        from custom_redis.client import Redis
    else:
        from redis import Redis

    redis_conn = Redis(host)
    key = "crawlid:%s" % crawlid
    data = redis_conn.hgetall(key)
    failed_keys = filter(lambda x: fnmatch.fnmatch(x, "failed_download_*"),
                         data.keys())
    format(data)
    for fk in failed_keys:
        print_if = raw_input("show the %s? y/n default n:" %
                             fk.replace("_", " "))
        if print_if == "y":
            key_ = "%s:%s" % (fk, crawlid)
            p = redis_conn.hgetall(key_)
            format(p, True)
示例#11
0
    def __init__(self, crawlid, spiderid, url, urls_file, priority, port, host, custom):
        self.crawlid = crawlid
        self.spiderid = spiderid
        self.url = url
        self.urls_file = urls_file
        self.priority = priority
        self.port = port
        self.host = host
        self.custom = custom
        self.inc = 0
        self.failed_count, self.failed_rate, self.sucess_rate = 0, 0, 0

        if self.custom:
            from custom_redis.client import Redis
        else:
            from redis import Redis

        self.redis_conn = Redis(host=self.host, port=self.port)
        self.clean_previous_task(self.crawlid)