Exemplo n.º 1
0
 def check(self, proxy, good):
     """
         检查代理是否可用
     """
     with ExceptContext(Exception, errback=lambda *args: False):
         if self.check_method(proxy):
             good.append(proxy)
Exemplo n.º 2
0
 def good_source(self):
     self.logger.debug("Start good source thread. ")
     while self.alive:
         with SleepManager(
                 self.settings.get_int("GOOD_CHECK_INTERVAL", 60 * 5),
                 self) as sm:
             if not sm.is_notified:
                 continue
             with ExceptContext(Exception, errback=self.log_err):
                 proxies = self.redis_conn.smembers("good_proxies")
                 if proxies:
                     self.logger.debug(
                         "Good proxy count is : %s, ready to check. " %
                         len(proxies))
                     self.proxies_check_in_queue.put(proxies)
     self.logger.debug("Stop good source thread. ")
Exemplo n.º 3
0
 def good_source(self):
     self.logger.debug("Start good source thread. ")
     while self.alive:
         with Blocker(
                 self.settings.get_int("GOOD_CHECK_INTERVAL", 60 * 5),
                 self,
                 notify=lambda instance: not instance.alive) as blocker:
             if blocker.is_notified:
                 continue
             with ExceptContext(errback=self.log_err):
                 proxies = self.redis_conn.smembers("good_proxies")
                 if proxies:
                     self.logger.debug(
                         "Good proxy count is : %s, ready to check. " %
                         len(proxies))
                     self.proxies_check_in_queue.put(proxies)
     self.logger.debug("Stop good source thread. ")
Exemplo n.º 4
0
 def reset_proxies(self):
     self.logger.debug("Start resets thread. ")
     while self.alive:
         with ExceptContext(Exception, errback=self.log_err):
             try:
                 proxies = self.proxies_check_out_queue.get_nowait()
             except Empty:
                 proxies = None
             if proxies:
                 self.logger.debug("Got %s proxies to reset. " %
                                   len(proxies))
                 for proxy, good in proxies.items():
                     if good:
                         self.redis_conn.sadd("good_proxies", proxy)
                         self.redis_conn.hdel("bad_proxies", proxy)
                     else:
                         self.redis_conn.hincrby("bad_proxies", proxy)
                         self.redis_conn.srem("good_proxies", proxy)
             else:
                 time.sleep(1)
         time.sleep(1)
     self.logger.debug("Stop resets thread. ")
Exemplo n.º 5
0
    def check_proxies(self):
        self.logger.debug("Start check thread. ")
        while self.alive:
            with ExceptContext(Exception, errback=self.log_err):
                try:
                    proxies = self.proxies_check_in_queue.get_nowait()
                except Empty:
                    proxies = None
                if proxies:
                    self.logger.debug("Got %s proxies to check. " %
                                      len(proxies))
                    proxies = [
                        proxy.decode() if isinstance(proxy, bytes) else proxy
                        for proxy in proxies
                    ]
                    good = list()
                    for i in range(0, len(proxies), 150):
                        # 分批检查
                        thread_list = []
                        for proxy in proxies[i:i + 150]:
                            th = Thread(target=self.check, args=(proxy, good))
                            th.setDaemon(True)
                            th.start()
                            thread_list.append(th)

                        start_time = time.time()
                        while [
                                thread
                                for thread in thread_list if thread.is_alive()
                        ] and start_time + 60 > time.time():
                            time.sleep(1)

                    self.logger.debug("%s proxies is good. " % (len(good)))
                    self.proxies_check_out_queue.put(
                        dict((proxy, proxy in good) for proxy in proxies))
                else:
                    time.sleep(1)
            time.sleep(1)
        self.logger.debug("Stop check thread. ")
Exemplo n.º 6
0
 def start(self):
     self.logger.debug("Start proxy factory. ")
     self.gen_thread(self.check_proxies)
     self.gen_thread(self.bad_source)
     self.gen_thread(self.good_source)
     self.gen_thread(self.reset_proxies)
     is_started = False
     while self.alive or [
             thread for thread in self.children if thread.is_alive()
     ]:
         with SleepManager(self.settings.get_int("FETCH_INTERVAL", 10 * 60),
                           self,
                           immediately=not is_started) as sm:
             if not sm.is_notified:
                 continue
             with ExceptContext(Exception, errback=self.log_err):
                 if self.alive:
                     self.logger.debug("Start to fetch proxies. ")
                     proxies = self.fetch_all()
                     self.logger.debug("%s proxies found. " % len(proxies))
                     self.proxies_check_in_queue.put(proxies)
         is_started = True
     self.logger.debug("Stop proxy factory. ")
Exemplo n.º 7
0
 def bad_source(self):
     self.logger.debug("Start bad source thread. ")
     while self.alive:
         with SleepManager(
                 self.settings.get_int("BAD_CHECK_INTERVAL", 60 * 5),
                 self) as sm:
             if not sm.is_notified:
                 continue
             with ExceptContext(Exception, errback=self.log_err):
                 proxies = self.redis_conn.hgetall("bad_proxies")
                 if proxies:
                     self.logger.debug(
                         "Bad proxy count is : %s, ready to check. " %
                         len(proxies))
                     for proxy, times in proxies.items():
                         if int(times) > self.settings.get_int(
                                 "FAILED_TIMES", 5):
                             self.redis_conn.hdel("bad_proxies", proxy)
                             self.logger.debug(
                                 "Abandon %s of failed for %s times. " %
                                 (proxy, times))
                     self.proxies_check_in_queue.put(proxies.keys())
     self.logger.debug("Stop bad source thread. ")