示例#1
0
    def _handle_tasks_result_list(**kwargs):
        all = kwargs.get('all', [])
        old = ip_pools_obj._get_all_ip_proxy(_k=proxy_list_key_name)
        # print(all)
        # print(old)

        for res_content in all:
            if res_content != []:
                # print(res_content)
                try:
                    for j in res_content:
                        old.append(j)
                except TypeError as e:
                    # 处理'NotRegistered' object is not iterable, Task of kind 'proxy_tasks._get_proxy' never registered, please make sure it's imported., Task of kind 'proxy_tasks._get_proxy' never registered, please make sure it's imported.
                    try:
                        # print('{}, 跳过!'.format(e.args[0]))
                        pass
                    except:
                        pass

        old = list_remove_repeat_dict(target=old, repeat_key='ip')
        old = serialize_obj_item_2_dict(old)  # 转化为dict, 避免反序列化时无法识别ProxyItem
        redis_cli.set(name=_key, value=dumps(old))

        return True
示例#2
0
def main():
    global time_str, local_ip

    _welcome()
    print('Getting local ip...')
    local_ip = get_local_external_network_ip()
    assert local_ip != '', 'local_ip获取失败!'
    print('[+] local ip: {}'.format(local_ip))
    while True:
        origin_proxy_data = list_remove_repeat_dict(
            target=ip_pools_obj._get_all_ip_proxy(_k=proxy_list_key_name),
            repeat_key='ip')
        while len(origin_proxy_data) < MAX_PROXY_NUM:
            print('\r' + _get_simulate_logger() +
                  'Ip Pools --->>> 已存在proxy_num(匿名度未知): {}'.format(
                      len(origin_proxy_data)),
                  end='',
                  flush=True)
            get_proxy_process_data()
            # 重置
            origin_proxy_data = list_remove_repeat_dict(
                target=ip_pools_obj._get_all_ip_proxy(_k=proxy_list_key_name),
                repeat_key='ip')
        else:
            print()
            lg.info('达标!休眠{}s...'.format(WAIT_TIME))
            sleep(WAIT_TIME)
            lg.info('Async Checking all_proxy(匿名度未知)...')
            origin_proxy_data = list_remove_repeat_dict(
                target=origin_proxy_data, repeat_key='ip')
            check_all_proxy(origin_proxy_data,
                            redis_key_name=_key,
                            delete_score=90)
            '''删除失效的, 时刻保持最新高匿可用proxy'''
            high_origin_proxy_list = list_remove_repeat_dict(
                target=ip_pools_obj._get_all_ip_proxy(
                    _k=high_proxy_list_key_name),
                repeat_key='ip')
            lg.info('Async Checking hign_proxy(高匿名)状态...')
            check_all_proxy(high_origin_proxy_list,
                            redis_key_name=_h_key,
                            delete_score=MIN_SCORE)
示例#3
0
def main():
    global time_str

    while True:
        origin_proxy_data = list_remove_repeat_dict(
            target=deserializate_pickle_object(
                redis_cli.get(_key) or dumps([])),
            repeat_key='ip')
        # print()
        while len(origin_proxy_data) < MAX_PROXY_NUM:
            print('\r' + _get_simulate_log_info() +
                  'Ip Pools --->>> 已存在proxy_num(匿名度未知): {}'.format(
                      len(origin_proxy_data)),
                  end='',
                  flush=True)
            get_proxy_process_data()
            # 重置
            origin_proxy_data = list_remove_repeat_dict(
                target=deserializate_pickle_object(
                    redis_cli.get(_key) or dumps([])),
                repeat_key='ip')
        else:
            print()
            lg.info('达标!休眠{}s...'.format(WAIT_TIME))
            sleep(WAIT_TIME)
            lg.info('Async Checking all_proxy(匿名度未知)...')
            origin_proxy_data = list_remove_repeat_dict(
                target=origin_proxy_data, repeat_key='ip')
            check_all_proxy(origin_proxy_data,
                            redis_key_name=_key,
                            delete_score=88)
            '''删除失效的, 时刻保持最新高匿可用proxy'''
            high_origin_proxy_list = list_remove_repeat_dict(
                target=deserializate_pickle_object(
                    redis_cli.get(_h_key) or dumps([])),
                repeat_key='ip')
            lg.info('Async Checking hign_proxy(高匿名)状态...')
            check_all_proxy(high_origin_proxy_list,
                            redis_key_name=_h_key,
                            delete_score=MIN_SCORE)
示例#4
0
    def _handle_tasks_result_list(**kwargs):
        all = kwargs.get('all', [])
        old = ip_pools_obj._get_all_ip_proxy(_k=proxy_list_key_name)

        for res_content in all:
            if res_content != []:
                old += res_content

        old = list_remove_repeat_dict(target=old, repeat_key='ip')
        old = serialize_obj_item_2_dict(old)  # 转化为dict, 避免反序列化时无法识别ProxyItem
        redis_cli.set(name=_key, value=dumps(old))

        return True
示例#5
0
    def _handle_tasks_result_list(**kwargs):
        all = kwargs.get('all', [])
        origin_data = redis_cli.get(_key) or dumps([])  # get为None, 则返回[]
        old = deserializate_pickle_object(origin_data)

        for res_content in all:
            if res_content != []:
                old += res_content

        old = list_remove_repeat_dict(target=old, repeat_key='ip')
        old = serialize_obj_item_2_dict(old)  # 转化为dict, 避免反序列化时无法识别ProxyItem
        redis_cli.set(name=_key, value=dumps(old))

        return True
示例#6
0
    async def _get_ip_proxy_list(self, ip_num=200) -> list:
        '''
        获取一个proxy
        :return:
        '''
        # http://webapi.http.zhimacangku.com/getip?num=200&type=2&pro=&city=0&yys=0&port=1&time=1&ts=1&ys=0&cs=0&lb=1&sb=0&pb=4&mr=1&regions=
        params = (
            ('num', str(ip_num)),   # 提取ip数
            ('type', '2'),          # 数据格式:1:TXT 2:JSON 3:html
            ('pro', ''),            # 省份, 默认全国
            ('city', '0'),          # 城市, 默认全国
            ('yys', '0'),           # 0:不限 100026:联通 100017:电信
            ('port', '1'),          # IP协议 1:HTTP 2:SOCK5 11:HTTPS
            ('time', '1'),          # 稳定时长, 最小的是1, 提取数量多
            ('ts', '1'),            # 是否显示IP过期时间: 1显示 2不显示
            ('ys', '0'),            # 是否显示IP运营商: 1显示
            ('cs', '0'),            # 是否显示位置: 1显示
            ('lb', '1'),            # 分隔符(1:\r\n 2:/br 3:\r 4:\n 5:\t 6 :自定义)
            ('sb', '0'),
            ('pb', '4'),            # 端口位数(4:4位端口 5:5位端口)
            ('mr', '1'),            # 去重选择(1:360天去重 2:单日去重 3:不去重)
            ('regions', ''),        # 全国混拨地区
        )
        url = 'http://webapi.http.zhimacangku.com/getip'
        ori = json_2_dict(await self._request(url=url, headers=await self._get_phone_headers(), params=params))
        data = ori.get('data', [])
        # pprint(data)
        if data != []:
            self.ip_list = await self._delete_expire_time_ip(data=data)
            self.ip_list = list_remove_repeat_dict(target=self.ip_list, repeat_key='ip')
            self.redis_cli.set(name=self._k, value=dumps(self.ip_list))    # 先转换为json再存入

        msg = ori.get('msg', '')
        if '设置为白名单' in msg:
            try:
                _ip = re.compile('(\d+\.\d+\.\d+\.\d+)').findall(msg)[0]
                await self._add_local_ip_to_white_list(local_ip=_ip)
                print('已将{}设置为白名单!'.format(_ip))
            except IndexError:
                pass

        return data
示例#7
0
def check_all_proxy(origin_proxy_data, redis_key_name, delete_score):
    '''
    检查所有已抓取代理状态
    :param origin_proxy_data:
    :param redis_key_name: redis待处理的key
    :param delete_score: 最低删除分数
    :return:
    '''
    def _create_tasks_list(origin_proxy_data):
        '''建立任务集'''
        nonlocal delete_score
        resutls = []
        for proxy_info in origin_proxy_data:
            last_check_time = proxy_info['last_check_time']
            ip = proxy_info['ip']
            port = proxy_info['port']
            score = proxy_info['score']
            if score <= delete_score:  # 删除跳过
                continue

            proxy = ip + ':' + str(port)
            # lg.info('testing {}...'.format(proxy))
            async_obj = check_proxy_status.apply_async(args=[proxy,
                                                             local_ip], )
            resutls.append({
                'proxy_info': proxy_info,
                'async_obj': async_obj,
            })

        return resutls

    def _get_tasks_result_list(resutls):
        '''得到结果集'''
        def write_hign_proxy_info_2_redis(one_proxy_info):
            '''redis新写入高匿名ip'''
            old_h_proxy_list = ip_pools_obj._get_all_ip_proxy(
                _k=high_proxy_list_key_name)
            old_ip_list = [i.get('ip') for i in old_h_proxy_list]
            if one_proxy_info.get('ip') not in old_ip_list:
                old_score = one_proxy_info.get('score')
                one_proxy_info.update({  # 加分
                    'score': old_score + 5,
                })
                old_h_proxy_list.append(one_proxy_info)
                old_h_proxy_list = serialize_obj_item_2_dict(
                    old_h_proxy_list)  # 防止反序列化时, 提示无法识别ProxyItem
                redis_cli.set(name=_h_key, value=dumps(old_h_proxy_list))
            else:
                pass
            return None

        all = []
        success_num = 1
        available_num = 0
        results_len = len(resutls)
        while len(resutls) > 0:
            for r_index, r in enumerate(resutls):
                proxy = r.get('proxy_info', {}).get('ip') + ':' + str(
                    r.get('proxy_info', {}).get('port'))
                task_id = r.get('async_obj').id
                status = r.get('async_obj').status
                one_proxy_info = r.get('proxy_info', {})
                # lg.info('task_id: {}, status: {}'.format(task_id, status))
                if r.get('async_obj').ready():
                    async_res = False
                    try:
                        async_res = r.get('async_obj').get(
                            timeout=2, propagate=False
                        )  # 抛出异常,但程序不会停止, r.get('async_obj').traceback 追踪完整异常
                    except TimeoutError:
                        pass
                    if async_res:
                        available_num += 1
                        # 高匿ip写入redis
                        write_hign_proxy_info_2_redis(one_proxy_info)

                    all.append({
                        'async_res': async_res,
                        'proxy_info': one_proxy_info,
                    })
                    # 动态输出, '\r'回到当前开头
                    print('\r' + _get_simulate_logger() +
                          '已检测ip: {}, 剩余: {}, 实际可用高匿个数: {}'.format(
                              success_num, results_len - success_num,
                              available_num),
                          end='',
                          flush=True)
                    success_num += 1
                    try:
                        resutls.pop(r_index)
                    except:
                        pass
                else:
                    # lg.info('{} 未完成!'.format(proxy))
                    pass
        else:
            print()
            # lg.info('所有异步结果完成!!')

        print('\r', end='', flush=True)

        return all

    def _handle_tasks_result_list(all):
        '''处理结果集'''
        def on_success(res, proxy_info):
            '''回调函数'''
            score = proxy_info.get('score')
            ip = proxy_info.get('ip')
            port = proxy_info.get('port')
            if not res:
                proxy_info.update({
                    'score': score - 2,
                })
                # lg.info('[-] {}:{}'.format(ip, port))
            else:
                # lg.info('[+] {}:{}'.format(ip, port))
                pass

            # 更新监控时间
            proxy_info.update({
                'last_check_time': str(get_shanghai_time()),
            })
            return proxy_info

        new_proxy_data = []
        for index, item in enumerate(all):
            new_proxy_info = on_success(res=item.get('async_res'),
                                        proxy_info=item.get('proxy_info'))
            new_proxy_data.append(new_proxy_info)

        return new_proxy_data

    global time_str

    resutls = _create_tasks_list(origin_proxy_data)
    sleep(.8)
    all = _get_tasks_result_list(resutls)

    # 处理储存最新数据
    new_proxy_data = list_remove_repeat_dict(
        target=_handle_tasks_result_list(all),
        repeat_key='ip',
    )
    new_proxy_data = serialize_obj_item_2_dict(new_proxy_data)
    redis_cli.set(name=redis_key_name, value=dumps(new_proxy_data))
    # lg.info('一次检查完毕!')

    return True