Exemplo n.º 1
0
def validUsefulProxy(proxy):
    """
    检验代理是否可用
    :param proxy:
    :return:
    """
    logger = LogHandler("validUsefulProxy")
    if isinstance(proxy, bytes):
        proxy = proxy.decode('utf8')
    proxies = {"https": f"https://{proxy}"}
    check_urls = config.check_urls
    length = len(check_urls)
    start = index = random.randint(0, length - 1)
    flag = True
    while flag or index != start:
        flag = False
        url = check_urls[index]
        index = (index + 1) % length
        logger.info(f'proxy {proxy} check {url}')
        try:
            r = requests.get(url, proxies=proxies, timeout=5, verify=False)
            if r.status_code == 200:
                logger.info(f'proxy {proxy} is useful')
                return True
        except Exception as e:
            logger.error(f'proxy {proxy} check {url} failed, {e}')
        time.sleep(1)
    logger.error(f'proxy {proxy} all check failed')
    return False
Exemplo n.º 2
0
class ProxyRefreshSchedule(ProxyManager):
    """
    代理定时刷新
    """

    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('refresh_schedule')

    def valid_proxy(self):
        """
        valid_proxy
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy = self.db.pop()
        self.log.info('%s start valid proxy' % time.ctime())
        while raw_proxy:
            if validUsefulProxy(raw_proxy):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.debug('proxy: %s validation passes' % raw_proxy)
            else:
                self.log.debug('proxy: %s validation fail' % raw_proxy)
                pass
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy = self.db.pop()
        self.log.info('%s valid proxy complete' % time.ctime())
Exemplo n.º 3
0
class Proxy_Check_Http(ProxyManage, Thread):
    def __init__(self, queue_http, item_dict):
        ProxyManage.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')
        self.queue_http = queue_http
        self.item_dict = item_dict

    def run(self):
        '''
        执行函数
        验证http
        :return:
        '''
        self.db.changeTable(self.useful_proxy_queue)
        while self.queue_http.qsize():
            proxy = self.queue_http.get()
            raw_proxy_dict = {'http': proxy}
            if validUsefulProxy(raw_proxy_dict):

                self.db.put(proxy)

                self.log.info("ProxyCheck :{} validation pass".format(proxy))
            else:
                self.db.delete(proxy)
                self.log.info("ProxyCheck :{} validation delete".format(proxy))

            self.queue_http.task_done()
Exemplo n.º 4
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('valid_schedule')

    def __validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info('validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                if value and value < -5:
                    # 计数器小于-5删除该代理
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')

    def main(self):
        self.__validProxy()
class ProxyRefreshSchedule(ProxyManager):
    """
    代理定时刷新
    """

    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('refresh_schedule')

    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy_item = self.db.pop()
        self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
        # 计算剩余代理,用来减少重复计算
        remaining_proxies = self.getAll()
        while raw_proxy_item:
            raw_proxy = raw_proxy_item.get('proxy')
            if isinstance(raw_proxy, bytes):
                # 兼容Py3
                raw_proxy = raw_proxy.decode('utf8')

            if (raw_proxy not in remaining_proxies) and validUsefulProxy(raw_proxy):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('ProxyRefreshSchedule: %s validation pass' % raw_proxy)
            else:
                self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy_item = self.db.pop()
            remaining_proxies = self.getAll()
        self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
Exemplo n.º 6
0
class ProxyRefreshSchedule(ProxyManager):
    """
    代理定时刷新
    """

    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('refresh_schedule')

    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy = self.db.pop()

        while raw_proxy:
            self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
            addr = "%s:%s" % (raw_proxy.get('ip'), raw_proxy.get('port'))

            if validUsefulProxy(addr):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('ProxyRefreshSchedule: %s validation pass' % addr)
            else:
                self.log.info('ProxyRefreshSchedule: %s validation fail' % addr)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy = self.db.pop()

        self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
Exemplo n.º 7
0
class ProxyRefreshSchedule(ProxyManager):
    """
    代理定时刷新
    """

    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('refresh_schedule')

    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy_item = self.db.pop()
        self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
        # 计算剩余代理,用来减少重复计算
        remaining_proxies = self.getAll()
        while raw_proxy_item:
            raw_proxy = raw_proxy_item.get('proxy')
            if isinstance(raw_proxy, bytes):
                # 兼容Py3
                raw_proxy = raw_proxy.decode('utf8')

            if (raw_proxy not in remaining_proxies) and validUsefulProxy(raw_proxy):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('ProxyRefreshSchedule: %s validation pass' % raw_proxy)
            else:
                self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy_item = self.db.pop()
            remaining_proxies = self.getAll()
        self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
Exemplo n.º 8
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self, queue, item_dict, check_urls):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check', file=False)  # 多线程同时写一个日志文件会有问题
        self.queue = queue
        self.item_dict = item_dict
        self.check_urls = check_urls

    # def run(self):
    #     self.db.changeTable(self.useful_proxy_queue)
    #     while self.queue.qsize():
    #         try:
    #             proxy = self.queue.get()
    #         except Empty:
    #             break
    #         count = self.item_dict[proxy]
    #         if validUsefulProxy(proxy):
    #             # 验证通过计数器减1
    #             if count and int(count) > 0:
    #                 self.db.put(proxy, num=int(count) - 1)
    #             else:
    #                 pass
    #             self.log.info('ProxyCheck: {} validation pass'.format(proxy))
    #         else:
    #             self.log.info('ProxyCheck: {} validation fail'.format(proxy))
    #             if count and int(count) + 1 >= FAIL_COUNT:
    #                 self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
    #                 self.db.delete(proxy)
    #             else:
    #                 self.db.put(proxy, num=int(count) + 1)
    #         self.queue.task_done()

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while self.queue.qsize():
            try:
                proxy = self.queue.get()
            except Empty:
                break
            if validUsefulProxy(proxy):
                self.log.info(f'ProxyCheck: {proxy} validation pass')
            else:
                self.log.info(
                    f'ProxyCheck: {proxy} validation fail, delete it from useful_proxy!'
                )
                # self.db.delete(proxy)
                ProxyManager.delete_proxy(proxy)
            self.queue.task_done()
Exemplo n.º 9
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy = self.db.pop()
            if proxy:
                addr = "%s:%s" % (proxy.get('ip'), proxy.get('port'))
                if validUsefulProxy(addr):
                    self.log.info('ProxyCheck: {} validation pass'.format(addr))
                else:
                    self.log.info('ProxyCheck: {} validation fail'.format(addr))
                    self.db.delete(proxy['ip'])
            sleep(20)
Exemplo n.º 10
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('valid_schedule')

    def validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            # for num in range(5):
            self.db.changeTable(self.useful_proxy_queue)

            each_proxys = self.db.getAll()
            print "验证所有ip", each_proxys
            if not each_proxys:
                time.sleep(100)

            for each_proxy in each_proxys:
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')
                print "验证ip:", each_proxy
                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug(
                        'validProxy_b: {} validation pass'.format(each_proxy))

                else:
                    # print "删除:",each_proxy
                    # self.db.delete(each_proxy)

                    # 失败计数器减一
                    self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info(
                        'validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                if value and int(value) < -1:
                    # 计数器小于-5删除该代理
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')
Exemplo n.º 11
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.db = DbClient()
        self.log = LogHandler('valid_schedule')

    def __validProxy(self):
        """
        验证代理
        :return:
        """
        time.sleep(60 * 0 * random.random())
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy) == True:
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    print "原有value  " + str(self.db.getvalue(each_proxy))
                    if self.db.getvalue(each_proxy) >= 0:
                        self.db.inckey(each_proxy, -1*int(self.db.getvalue(each_proxy)))
                    else:
                        self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info('validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                print  value
                if None != value and int(value) < 0:
                    # 计数器小于-5删除该代理
                    print "删除" + each_proxy
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')

    def main(self):
        self.__validProxy()
Exemplo n.º 12
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self, queue, item_dict):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check', file=False)
        self.queue = queue
        self.item_dict = item_dict

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while self.queue.qsize():
            proxy = self.queue.get()
            count = self.item_dict[proxy]
            if validUsefulProxy(proxy):
                # 验证通过计数器减1
                if count and int(count) > 0:
                    self.db.put(proxy, num=int(count) - 1)

                self.log.info('proxycheck:{} validation pass'.format(proxy))
            else:
                self.log.info('proxycheck:{} validation fail'.format(proxy))
                if count and int(count) + 1 >= FAIL_COUNT:
                    self.log.info(
                        'proxycheck:{} fial too many,delete'.format(proxy))
                    self.db.delete(proxy)
                else:
                    self.db.put(proxy, num=int(count) - 1)
            self.queue.task_done()
Exemplo n.º 13
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self, queue, item_dict):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check', file=False)  # 多线程同时写一个日志文件会有问题
        self.queue = queue
        self.item_dict = item_dict

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while self.queue.qsize():
            proxy = self.queue.get()
            count = self.item_dict[proxy]
            if validUsefulProxy(proxy):
                # 验证通过计数器减1
                if count and int(count) > 0:
                    self.db.put(proxy, num=int(count) - 1)
                else:
                    pass
                self.log.info('ProxyCheck: {} validation pass'.format(proxy))
            else:
                self.log.info('ProxyCheck: {} validation fail'.format(proxy))
                if count and int(count) + 1 >= FAIL_COUNT:
                    self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
                    self.db.delete(proxy)
                else:
                    self.db.put(proxy, num=int(count) + 1)
            self.queue.task_done()
Exemplo n.º 14
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            for proxy, count in self.db.getAll().items():
                if validUsefulProxy(proxy):
                    # 验证通过计数器减1
                    if count and int(count) > 0:
                        self.db.put(proxy, num=int(count) - 1)
                    else:
                        pass
                    self.log.info(
                        'ProxyCheck: {} validation pass'.format(proxy))
                else:
                    self.log.info(
                        'ProxyCheck: {} validation fail'.format(proxy))
                    if count and int(count) > FAIL_COUNT:
                        self.log.info(
                            'ProxyCheck: {} fail too many, delete!'.format(
                                proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(count) + 1)
            sleep(60 * 5)
Exemplo n.º 15
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('valid_schedule')

    def __validProxy__(self):
        """
        验证代理
        :return:
        """
        while 1:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if validUsefulProxy(each_proxy):
                    self.log.debug(
                        'proxy: {} validation pass'.format(each_proxy))
                    if validTelnetProxy(each_proxy):
                        self.log.debug(
                            'proxy: {} telnet pass'.format(each_proxy))
                    else:
                        self.db.delete(each_proxy)
                        self.log.info(
                            'proxy: {} telnet fail'.format(each_proxy))
                else:
                    self.db.delete(each_proxy)
                    self.log.info(
                        'proxy: {} validation fail'.format(each_proxy))
        self.log.info(u'代理验证程序运行正常')

    def main(self):
        self.__validProxy__()
Exemplo n.º 16
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self, queue, item_dict):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check', file=False)  # 多线程同时写一个日志文件会有问题
        self.queue = queue
        self.item_dict = item_dict

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            try:
                proxy = self.queue.get(block=False)
            except Empty:
                break
            count = self.item_dict[proxy]
            if validUsefulProxy(proxy):
                # 验证通过计数器减1
                if count and int(count) > 0:
                    self.db.put(proxy, num=int(count) - 1)
                else:
                    pass
                self.log.info('ProxyCheck: {} validation pass'.format(proxy))
            else:
                self.log.info('ProxyCheck: {} validation fail'.format(proxy))
                if count and int(count) + 1 >= FAIL_COUNT:
                    self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
                    self.db.delete(proxy)
                else:
                    self.db.put(proxy, num=int(count) + 1)
            self.queue.task_done()
Exemplo n.º 17
0
class ProxyRefreshSchedule(ProxyManager):
    """
    代理定时刷新
    """
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('refresh_schedule')

    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy = self.db.pop()
        self.log.info('%s start validProxy_a' % time.ctime())
        # exist_proxy = self.db.getAll()
        while raw_proxy:
            self.db.changeTable(self.useful_proxy_queue)
            exist_proxy = self.db.getAll()
            if validUsefulProxy(raw_proxy) and (raw_proxy not in exist_proxy):
                # self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('validProxy_a: %s validation pass' % raw_proxy)
            else:
                self.log.debug('validProxy_a: %s validation fail' % raw_proxy)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy = self.db.pop()
        self.log.info('%s validProxy_a complete' % time.ctime())
class ProxyCheck(ProxyManager, Thread):
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value')
                if validUsefulProxy(proxy):
                    # 验证通过计数器加1, 计数在-5到1之间
                    if counter and int(counter) < 1:
                        self.db.put(proxy, num=int(counter) + 1)
                    else:
                        self.db.put(proxy)
                    self.log.info(
                        'ProxyCheck: {} validation pass'.format(proxy))
                else:
                    self.log.info(
                        'ProxyCheck: {} validation fail'.format(proxy))
                    # 验证失败,计数器减1
                    if counter and int(counter) < -5:
                        self.log.info(
                            'ProxyCheck: {} fail too many, delete!'.format(
                                proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(counter) - 1)

                proxy_item = self.db.pop()
            sleep(60 * 5)
Exemplo n.º 19
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value', 1)
                if validUsefulProxy(proxy):
                    # 验证通过计数器加1
                    if counter and int(counter) < 1:
                        self.db.put(proxy, num=int(counter) + 1)
                    else:
                        self.db.put(proxy)
                    self.log.info('ProxyCheck: {} validation pass'.format(proxy))
                else:
                    self.log.info('ProxyCheck: {} validation fail'.format(proxy))
                    # 验证失败,计数器减1
                    if counter and int(counter) <= FAIL_COUNT:
                        self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(counter) - 1)

                proxy_item = self.db.pop()
            sleep(60 * 5)
Exemplo n.º 20
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('valid_schedule')

    def __validProxy__(self):
        """
        验证代理
        :return:
        """
        while 1:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if validUsefulProxy(each_proxy):
                    self.log.debug('proxy: {} validation pass'.format(each_proxy))
                else:
                    self.db.delete(each_proxy)
                    self.log.info('proxy: {} validation fail'.format(each_proxy))
        self.log.info(u'代理验证程序运行正常')

    def main(self):
        self.__validProxy__()
Exemplo n.º 21
0
def testLogHandler():
    log = LogHandler('test')
    log.info('this is a log from test')

    log.resetName(name='test1')
    log.info('this is a log from test1')

    log.resetName(name='test2')
    log.info('this is a log from test2')
Exemplo n.º 22
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')

    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value')
                if validUsefulProxy(proxy):
                    self.log.info(
                        'ProxyCheck: {} validation pass'.format(proxy))
                    self.db.put(proxy)
                else:
                    self.log.info(
                        'ProxyCheck: {} validation fail'.format(proxy))
                    self.db.delete(proxy)

                proxy_item = self.db.pop()
            sleep(30)
Exemplo n.º 23
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            try:
                proxy_set = set()
                # fetch raw proxy
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    if proxy:
                        self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                        proxy_set.add(proxy.strip())

                # store raw proxy
                for proxy in proxy_set:
                    self.db.changeTable(self.useful_proxy_queue)
                    if self.db.exists(proxy):
                        continue
                    self.db.changeTable(self.raw_proxy_queue)
                    self.db.put(proxy)
            except Exception, e:
                print e
                continue
Exemplo n.º 24
0
class proxyRefreshSchedule(ProxyManage):
    '''
    定期刷新代理
    '''
    def __init__(self):
        ProxyManage.__init__(self)
        self.log = LogHandler("refresh_schedule")

    def validProxy(self, row_table, usefultable):
        '''
        验证row_proxy 中的代理
        :return:
        '''

        self.db.changeTable(row_table)
        raw_proxy_item = self.db.pop()
        self.log.info("ProxyRefreshSchedule:{} start validProxy".format(
            time.ctime()))
        remaining_proxies = self.getAll(self.useful_proxy_queue)
        while raw_proxy_item:
            try:
                raw_proxy = raw_proxy_item.get('proxy')
            except:
                raw_proxy = raw_proxy_item
            if isinstance(raw_proxy, bytes):
                raw_proxy.decode('utf-8')
            if 'https' in row_table:
                raw_proxy_dict = {'https': raw_proxy}
            else:
                raw_proxy_dict = {'http': raw_proxy}
            if (raw_proxy not in remaining_proxies
                    and validUsefulProxy(raw_proxy_dict)):
                self.db.changeTable(usefultable)
                self.db.put(raw_proxy)
                self.log.info("ProxyRefreshSchedule:%s validation pass" %
                              raw_proxy)
            else:
                self.log.info("ProxyRefreshSchedule: %s validation fail" %
                              raw_proxy)
            self.db.changeTable(row_table)
            raw_proxy_item = self.db.pop()
            remaining_proxies = self.getAll(row_table)
        self.log.info("ProxyRefreshSchedule:%s  validProxy complete" %
                      time.ctime())
Exemplo n.º 25
0
def testLogHandler():
    """
    test function LogHandler  in Util/LogHandler
    :return:
    """
    log = LogHandler('test')
    log.info('this is a log from test')

    log.resetName(name='test1')
    log.info('this is a log from test1')

    log.resetName(name='test2')
    log.info('this is a log from test2')
Exemplo n.º 26
0
def testLogHandler():
    """
    test function LogHandler  in Util/LogHandler
    :return:
    """
    log = LogHandler('test')
    log.info('this is a log from test')

    log.resetName(name='test1')
    log.info('this is a log from test1')

    log.resetName(name='test2')
    log.info('this is a log from test2')
Exemplo n.º 27
0
class ProxyRefreshSchedule(ProxyManager):
    """
    定时刷新raw中代理,将可用代理放入useful
    """
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('ProxyRefresh')

    def start(self):
        self.log.info('Proxy valid start')
        self.db_client.change_table(self.raw_proxy)
        proxy = self.db_client.pop()
        while proxy:
            if proxy_useful_valid(proxy):
                self.log.info('Proxy valid pass {}'.format(proxy))
                self.db_client.change_table(self.useful_proxy)
                self.db_client.put(proxy)
                self.db_client.change_table(self.raw_proxy)
            else:
                self.log.info('Proxy valid failed {}'.format(proxy))
            proxy = self.db_client.pop()
        self.log.info('Proxy valid end')
Exemplo n.º 28
0
class ProxyCheck(ProxyManager, Thread):
    """
    检查useful中的代理,不可用的删除
    """
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('ProxyCheck')

    def run(self):
        self.log.info('Proxy useful check start')
        while True:
            self.db_client.change_table(self.useful_proxy)
            proxy = self.db_client.pop()
            while proxy:
                if proxy_useful_valid(proxy):
                    self.log.info('Proxy useful valid pass {}'.format(proxy))
                    self.db_client.put(proxy)
                else:
                    self.log.info('Proxy useful valid failed {}'.format(proxy))
                    self.db_client.delete(proxy)
                proxy = self.db_client.pop()
            self.log.info('Proxy useful check pausing')
            sleep(5 * 60)
Exemplo n.º 29
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('valid_schedule')

    def __validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    # 兼容PY3
                    each_proxy = each_proxy.decode('utf-8')

                value = self.db.get(each_proxy)
                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    if value and int(value) < 1:
                        self.db.update(each_proxy, 1)
                    self.log.info('ProxyValidSchedule: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    if value and int(value) < -5:
                        # 计数器小于-5删除该代理
                        self.db.delete(each_proxy)
                    else:
                        self.db.update(each_proxy, -1)
                    self.log.info('ProxyValidSchedule: {} validation fail'.format(each_proxy))

            self.log.info('ProxyValidSchedule running normal')
            sleep(60 * 1)

    def main(self):
        self.__validProxy()
Exemplo n.º 30
0
class ProxyCheck(ProxyManager, Thread):
    def __init__(self):
        ProxyManager.__init__(self)
        Thread.__init__(self)
        self.log = LogHandler('proxy_check')

    def run(self):
        #todo 该方法重写threading里面的run方法,实例化该类,然后.start()就按照平时的进程执行
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                #todo 一直去循环判断数据库里面的useful_proxy_queue
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value')
                if validUsefulProxy(proxy):
                    # 验证通过计数器加1
                    if counter and int(counter) < 1:
                        self.db.put(proxy, num=int(counter) + 1)
                    else:
                        self.db.put(proxy)
                    self.log.info(
                        'ProxyCheck: {} validation pass'.format(proxy))
                else:
                    self.log.info(
                        'ProxyCheck: {} validation fail'.format(proxy))
                    # 验证失败,计数器减1
                    if counter and int(counter) <= -FAIL_COUNT:
                        self.log.info(
                            'ProxyCheck: {} fail too many, delete!'.format(
                                proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(counter) - 1)

                proxy_item = self.db.pop()
            sleep(60 * 5)
Exemplo n.º 31
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch
            # proxy_set = set()
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                # proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 32
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def fetch(self):
        """
        fetch proxy into db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        proxy_set = set()
        self.log.info("ProxyFetch : start")

        for proxyGetter in config.proxy_getter_functions:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=proxyGetter))
            try:
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()

                    if not proxy or not verifyProxyFormat(proxy):
                        self.log.error('ProxyFetch - {func}: '
                                       '{proxy} illegal'.format(
                                           func=proxyGetter,
                                           proxy=proxy.ljust(20)))

                        continue
                    elif proxy in proxy_set:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} exist'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))

                        continue
                    else:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} success'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        self.db.put(Proxy(proxy, source=proxyGetter))
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=proxyGetter))
                self.log.error(str(e))

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            random_choice = random.choice(item_list)

            return Proxy.newProxyFromJson(random_choice)

        return None

    def get_http(self):
        """
        return a http proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'http':
                    return Proxy.newProxyFromJson(random_choice)

        return None

    def get_socks(self):
        """
        return a useful socks proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'socks4':
                    return Proxy.newProxyFromJson(random_choice)

        return None

    def delete(self, proxy_str):
        """
        delete proxy from pool
        :param proxy_str:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy_str)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        return [Proxy.newProxyFromJson(_) for _ in item_list]

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()

        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 33
0
class ProxyManager(object):

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        抓取代理地址存入DB中
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()

            try:
                self.log.info("{func}:fetch proxy start".format(func=proxyGetter))
                proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
            except Exception as e:
                self.log.error("{func}:fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy))

            # 存储到DB
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        返回一个有用的代理
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        items = self.db.getAll()
        if EnvUtil.PY3:
            return list(items.keys()) if items else list()
        return items.key() if items else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_proxy = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_proxy
        }
Exemplo n.º 34
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'
        self.adsl_queue = 'adsl'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def initProxyPool(self):
        """
        第一次启动时调用这个方法
        :return:
        """
        self.deleteAll()
        self.db.changeTable(self.adsl_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.values()) if item_dict else list()
        return item_dict.values() if item_dict else list()

    def deleteAll(self):
        """
        清空代理池
        :param proxy:
        :return:
        """
        # 删除所有
        proxies = self.getAll()
        for proxy in proxies:
            self.delete(proxy)

    def refreshADSL(self, proxy):
        """
        重新拨号
        :param proxy:
        :return:
        """
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf8')
        ip = proxy.split(':')[0]
        try:
            # 调用接口重新拨号
            refreshApi = "http://{ip}:8000/refresh".format(ip=ip)
            r = requests.get(refreshApi, timeout=5, verify=False)
            if r.status_code == 200:
                print('{proxy} refres done')
        except Exception as e:
            print(str(e))
Exemplo n.º 35
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
Exemplo n.º 36
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }