Пример #1
0
def get_info(ip, port):  # 获取代理IP的详细信息
    url = CHECHINFO_IP
    headers = get_headers()
    proxies = {
        'http': 'http://{ip}:{port}'.format(ip=ip, port=port),
        'https': 'http://{ip}:{port}'.format(ip=ip, port=port)
    }
    params = {'ip': ip}
    try:
        start_time = time.time()
        resp = requests.get(url,
                            headers=headers,
                            params=params,
                            proxies=proxies,
                            timeout=15).json()
        end_time = time.time()
        speed = '%.2f' % (end_time - start_time)
        text = resp['data']
        if text['country'] and text['region']:
            country = text['country']
            region = text['region']
            city = text['city']
            isp = text['isp']
            return country, region, city, isp, speed
        else:
            return None
    except BaseException as e:
        print(e)
        return None
Пример #2
0
def crawl_github(url):
    headers = get_headers()
    try:
        resp = requests.get(url, headers=headers, timeout=15).text
        contents = resp.split('\n')[:-2]
        ip = []
        port = []
        types = []
        protocol = []
        for i in contents:
            test = json.loads(i)
            if test['country'] == 'CN':
                ip.append(test['host'])
                port.append(test['port'])
                if test['anonymity'] == 'high_anonymous':
                    types.append('高匿')
                elif test['anonymity'] == 'anonymous':
                    types.append('匿名')
                else:
                    types.append('透明')
                protocol.append(test['type'].upper())
        return zip(ip, port, types, protocol)
    except BaseException as e:
        # print(e)
        return None
Пример #3
0
 def download(self, url):
     print("Download:{}".format(url))  # 因为url在变化,所以要用异常处理
     r = requests.get(url, headers=get_headers(), timeout=10)
     # print(chardet.detect(r.content))  # 识别编码过程{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}
     r.encoding = chardet.detect(r.content)["encoding"]  # 解对应编码
     if r.status_code == 200 or len(r.content) > 500:  # 如果访问成功
         return r.text  # 返回文本
Пример #4
0
def crawl_cloud(url):  # 爬取云代理免费代理
    headers = get_headers()
    try:
        resp = session.get(url, headers=headers)
        contents = resp.html.find('#list tr')[1:]
        ip = []
        port = []
        proxy_kind = []
        proxy_type = []
        for i in range(len(contents)):
            items = contents[i]
            contents_list = items.text.split('\n')
            ip.append(contents_list[0])
            port.append(contents_list[1])
            proxy_kind.append('高匿')
            proxy_type.append(contents_list[3])
        return list(zip(ip, port, proxy_kind, proxy_type))
    except BaseException as e:
        # print(e)
        return None
Пример #5
0
def check_ip(ip, port):  # 检测代理IP是否可用
    url = TEST_IP
    headers = get_headers()
    proxies = {
        'http': 'http://{ip}:{port}'.format(ip=ip, port=port),
        'https': 'http://{ip}:{port}'.format(ip=ip, port=port)
    }
    try:
        start_time = time.time()
        resp = requests.get(url, headers=headers, proxies=proxies,
                            timeout=15).text
        end_time = time.time()
        speed = '%.2f' % (end_time - start_time)
        if ip == json.loads(resp.replace(';',
                                         '').split('=')[-1].strip())['cip']:
            return speed
        else:
            return None
    except:
        return None
Пример #6
0
def check_proxy(redis_to, ip_port, zname, url=None):
    if not url:
        url = "https://www.baidu.com"

    ip, port, *_ = ip_port.split(":")
    proxies = {"http": f"http://{ip}:{port}", "https": f"http://{ip}:{port}"}
    print("开始检测", ip_port)
    start_time = time.time()
    try:
        res = requests.get(url=url,
                           headers=config.get_headers(),
                           proxies=proxies,
                           verify=False)
        if res.status_code == 200:
            # speed = round(time.time() - start_time, 3)
            speed = int(time.time() - start_time)
            print(f"响应时间为 {speed},可用代理{proxies}")
            change_score(redis_to, ip_port, zname, str(speed), 1)
    except Exception:
        change_score(redis_to, ip_port, zname, "1001", 0)
Пример #7
0
import json
import requests
from config import get_headers
from requests_html import HTMLSession
from util.browsertool import create_browser, select_em

header = get_headers()

urls = 'http://www.xicidaili.com/nn/1'

session = HTMLSession()


def crawl_xici(url):  # 爬取西刺免费代理
    headers = get_headers()
    try:
        resp = session.get(url, headers=headers)
        contents = resp.html.find('#ip_list tr')[1:]
        ip = []
        port = []
        proxy_kind = []
        proxy_type = []
        for i in range(len(contents)):
            items = contents[i]
            contents_list = items.text.split('\n')
            ip.append(contents_list[0])
            port.append(contents_list[1])
            if len(contents_list) >= 7:
                proxy_kind.append(contents_list[3])
                proxy_type.append(contents_list[4])
            else:
Пример #8
0
# -*- coding:utf-8 -*-

from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from config import get_headers
from selenium import webdriver

headers = get_headers()

# 创建一个浏览器引擎


def create_browser(op_type):
    if op_type == 'close':
        chrome_options = webdriver.ChromeOptions()
        # mobile_emulation = {"deviceName": "Galaxy S5"}
        chrome_options.add_argument('--headless')
        # chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('disable-infobars')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--no-sandbox')
        for key, value in headers.items():
            chrome_options.add_argument(key + '=' + value)
        browser = webdriver.Chrome(chrome_options=chrome_options)
        browser.delete_all_cookies()
        return browser
    elif op_type == 'open':
        chrome_options = webdriver.ChromeOptions()
        # mobile_emulation = {"deviceName": "Galaxy S5"}
Пример #9
0
 def download(url):
     print("Download:{}".format(url))  # 因为url在变化,所以要用异常处理
     r = requests.get(url, headers=get_headers(), timeout=10)
     r.encoding = chardet.detect(r.content)["encoding"]  # 解对应编码
     if r.status_code == 200 or len(r.content) > 500:  # 如果访问成功
         return r.text  # 返回文本