Пример #1
0
 def get(self):
     """
     return a useful proxy
     :return:
     """
     self.db.changeTable(self.useful_proxy_queue)
     item_list = self.db.getAll()
     if item_list:
         random_choice = random.choice(item_list)
         return Proxy.newProxyFromJson(random_choice)
     return None
Пример #2
0
    def get_http(self):
        """
        return a http proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'http':
                    return Proxy.newProxyFromJson(random_choice)

        return None
Пример #3
0
    def run(self):
        self.log.info("RawProxyCheck - {}  : start".format(self.name))
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            try:
                proxy_json = self.queue.get(block=False)
            except Empty:
                self.log.info("RawProxyCheck - {}  : exit".format(self.name))
                break

            proxy_obj = Proxy.newProxyFromJson(proxy_json)

            proxy_obj, status = checkProxyUseful(proxy_obj)
            if status:
                self.db.put(proxy_obj)
                self.log.info('RawProxyCheck - {}  : {} validation pass'.format(self.name, proxy_obj.proxy.ljust(20)))
            else:
                self.log.info('RawProxyCheck - {}  : {} validation fail'.format(self.name, proxy_obj.proxy.ljust(20)))
            self.queue.task_done()
Пример #4
0
    def fetch(self):
        """
        fetch proxy into db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        proxy_set = set()
        self.log.info("ProxyFetch : start")
        for proxyGetter in config.proxy_getter_functions:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=proxyGetter))
            try:
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy_all = proxy.strip()
                    proxy = proxy_all.split("|")[0]
                    p_type = proxy_all.split("|")[1]

                    if not proxy or not verifyProxyFormat(proxy):
                        self.log.error('ProxyFetch - {func}: '
                                       '{proxy} illegal'.format(
                                           func=proxyGetter,
                                           proxy=proxy.ljust(20)))
                        continue
                    elif proxy in proxy_set:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} exist'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        continue
                    else:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} success'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        print "{} get_sucess".format(proxy)
                        self.db.put(
                            Proxy(proxy, proxy_type=p_type,
                                  source=proxyGetter))
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=proxyGetter))
Пример #5
0
    def getAllByName(self, name):
        all_proxies = self.getAll()

        self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
        fail_list = self.db.getAll()
        fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list]

        # todo: 优化
        filter_proxies = []
        for proxy in all_proxies:
            isFailed = False
            for failed in fail_proxies:
                if failed.proxy == proxy.proxy:
                    failed_date = datetime.strptime(failed.last_time,
                                                    "%Y-%m-%d %H:%M:%S")
                    if failed_date + timedelta(hours=24) > datetime.now():
                        isFailed = True
                    break
            if not isFailed:
                filter_proxies.append(proxy)

        return filter_proxies
Пример #6
0
    def run(self):
        self.log.info("UsefulProxyCheck - {}  : start".format(self.name))
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            try:
                proxy_str = self.queue.get(block=False)
            except Empty:
                self.log.info("UsefulProxyCheck - {}  : exit".format(
                    self.name))
                break

            proxy_obj = Proxy.newProxyFromJson(proxy_str)
            proxy_obj, status = checkProxyUseful(proxy_obj, self.origin_ips)
            if status or proxy_obj.fail_count < FAIL_COUNT:
                self.db.put(proxy_obj)
                self.log.info(
                    'UsefulProxyCheck - {}  : {} validation pass'.format(
                        self.name, proxy_obj.proxy.ljust(20)))
            else:
                self.log.info(
                    'UsefulProxyCheck - {}  : {} validation fail'.format(
                        self.name, proxy_obj.proxy.ljust(20)))
                self.db.delete(proxy_obj.proxy)
            self.queue.task_done()
Пример #7
0
 def get_all_proxy(self):
     self.db.changeTable(self.asdl_proxy_queue)
     item_list = self.db.getAll()
     return [Proxy.newProxyFromJson(_) for _ in item_list]
Пример #8
0
 def add_asdl_proxy(self, proxy_str):
     proxy = Proxy(proxy_str,
                   last_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
     self.db.changeTable(self.asdl_proxy_queue)
     self.db.put(proxy)
Пример #9
0
 def deleteByName(self, name, proxy):
     failed_proxy = Proxy(
         proxy=proxy,
         last_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
     self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
     self.db.put(failed_proxy)