Exemplo n.º 1
0
    def getAllProxy(pool_size=10, thread_or_process=True, is_refash=True):

        Log.v('正在更新ip池,请稍后...')
        # address = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + '/'
        if is_refash:
            proxys = GetFreeProxy.get_list_proxys()
            proxys = list(set(proxys))
            sqlite = Sqlite(address + 'ip.db')
            sqlite.update_data('DELETE FROM ip_house')
            sqlite = Sqlite(address + 'ip.db')
            for i in range(len(proxys)):
                if proxys[i] and GetFreeProxy.verifyProxyFormat(proxys[i]):
                    sqlite.cursor.execute(
                        "INSERT INTO ip_house VALUES (?,?,?,?);",
                        [i + 1, proxys[i], 'HTTP', 'true'])
            sqlite.conn.commit()
            sqlite.close_conn()

            GetFreeProxy.get_proxys_by_website()
        else:
            sqlite = Sqlite(address + 'ip.db')
            results = sqlite.query_data(
                'select count(proxy_adress) from ip_house')
            if int(results[0][0]) == 0:
                proxys = GetFreeProxy.get_list_proxys()
                proxys = list(set(proxys))
                sqlite = Sqlite(address + 'ip.db')
                for i in range(len(proxys)):
                    if proxys[i] and GetFreeProxy.verifyProxyFormat(proxys[i]):
                        sqlite.cursor.execute(
                            "INSERT INTO ip_house VALUES (?,?,?,?);",
                            [i + 1, proxys[i], 'HTTP', 'true'])
                sqlite.conn.commit()
                sqlite.close_conn()
                GetFreeProxy.get_proxys_by_website()

        sqlite = Sqlite(address + 'ip.db')
        results = sqlite.query_data(
            'select id,proxy_adress,type from ip_house')
        params = []
        for result in results:
            param = str(result[0]) + '&&' + result[1] + '&&' + result[2]
            params.append(param)
        Log.v("发现ip代理数量:" + str(len(params)))
        Log.v('正在检查ip可用性...')
        if thread_or_process:
            GetFreeProxy.exec_multi_threading(pool_size, params)
        else:
            GetFreeProxy.exec_multi_process(pool_size, params)
        Log.v('更新完成')
Exemplo n.º 2
0
    def get_proxys_by_website():
        import time
        headers = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Upgrade-Insecure-Requests': '1',
            'Host': 'www.xicidaili.com',
            'Pragma': 'no-cache',
            'Connection': 'keep-alive',
            'Cache-Control': 'no-cache',
            'Accept-Language': 'zh-CN,zh;q=0.9'
        }
        response = requests.get('https://www.xicidaili.com/nn/1',
                                headers=headers,
                                timeout=20)
        html = BeautifulSoup(response.text, 'html.parser')
        table = html.find('table', attrs={'id': 'ip_list'})
        trs = table.find_all('tr')
        trs_list = []
        trs_list.extend(trs[1:])
        time.sleep(1)
        response = requests.get('https://www.xicidaili.com/nt/',
                                headers=headers,
                                timeout=20)
        html = BeautifulSoup(response.text, 'html.parser')

        table = html.find('table', attrs={'id': 'ip_list'})
        trs = table.find_all('tr')
        trs_list.extend(trs[1:])
        time.sleep(1)
        response = requests.get('https://www.xicidaili.com/wn/',
                                headers=headers,
                                timeout=20)
        html = BeautifulSoup(response.text, 'html.parser')

        table = html.find('table', attrs={'id': 'ip_list'})
        trs = table.find_all('tr')
        trs_list.extend(trs[1:])
        time.sleep(1)
        response = requests.get('https://www.xicidaili.com/wt/',
                                headers=headers,
                                timeout=20)
        html = BeautifulSoup(response.text, 'html.parser')

        table = html.find('table', attrs={'id': 'ip_list'})
        trs = table.find_all('tr')
        trs_list.extend(trs[1:])

        proxys = []
        for i in range(0, len(trs_list)):
            tds = trs_list[i].find_all('td')
            ip = tds[1].get_text()
            port = tds[2].get_text()
            type = str(tds[5].get_text()).strip(' ')
            date_time = str(tds[9].get_text()).split(' ')[0]
            now = time.strftime("%y-%m-%d", time.localtime())
            # if date_time == now:
            proxys.append(ip + ":" + port + '&' + type)

        proxys = list(set(proxys))
        sqlite = Sqlite(address + 'ip.db')
        results = sqlite.query_data('select count(proxy_adress) from ip_house')
        sqlite = Sqlite(address + 'ip.db')
        num = int(results[0][0])
        for i in range(len(proxys)):
            paras = proxys[i].split('&')
            if paras[0] and GetFreeProxy.verifyProxyFormat(paras[0]):
                sqlite.cursor.execute("INSERT INTO ip_house VALUES (?,?,?,?);",
                                      [num + i, paras[0], paras[1], 'true'])
        sqlite.conn.commit()
        sqlite.close_conn()