示例#1
0
async def kuaidaili():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get(f'https://www.kuaidaili.com/free/inha/')
    for ip_row in resp.html.find('#list table tr'):
        ip = ip_row.find('td[data-title="IP"]', first=True)
        port = ip_row.find('td[data-title="PORT"]', first=True)
        if ip and port:
            res.append(Proxy(
                ip_port=f"{ip.text}:{port.text}",
                scheme=SCHEME_HTTP,
                status=STATUS_NEW,
            ))
    await asyncio.sleep(5)
    resp = await sess.get(f'https://www.kuaidaili.com/free/intr/')
    for ip_row in resp.html.find('#list table tr'):
        ip = ip_row.find('td[data-title="IP"]', first=True)
        port = ip_row.find('td[data-title="PORT"]', first=True)
        if ip and port:
            res.append(Proxy(
                ip_port=f"{ip.text}:{port.text}",
                scheme=SCHEME_HTTP,
                status=STATUS_NEW,
            ))
    await sess.close()

    return res
示例#2
0
async def proxynova():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get('https://www.proxynova.com/proxy-server-list/')
    for tr in resp.html.find('#tbl_proxy_list > tbody:nth-child(2) > tr'):
        if 'data-proxy-id' not in tr.attrs:
            continue

        script_element = tr.find('td:nth-child(1) > abbr > script', first=True)
        port_element = tr.find('td:nth-child(2)', first=True)
        if not script_element or not port_element:
            continue

        groups = re.findall(
            r"document\.write\('(.*?)'\);",
            script_element.text)
        if not groups or len(groups) != 1:
            continue
        ip = groups[0]
        port = port_element.text
        res.append(Proxy(
            ip_port=f"{ip}:{port}",
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        ))
    await sess.close()

    return res
示例#3
0
async def http_proxy():
    res = list()

    sess = AsyncHTMLSession()
    for u in [
        'https://proxyhttp.net/free-list/proxy-anonymous-hide-ip-address/',
        'https://proxyhttp.net/',
        'https://proxyhttp.net/free-list/anonymous-server-hide-ip-address/2#proxylist',
    ]:
        resp = await sess.get(u)
        await resp.html.arender(wait=1.5, timeout=10.0)
        for ip_row in resp.html.find('table.proxytbl tr'):
            ip = ip_row.find('td:nth-child(1)', first=True)
            port = ip_row.find('td:nth-child(2)', first=True)
            try:
                if ip and port:
                    port_str = re.search(r'//]]> (\d+)', port.text).group(1)
                    res.append(Proxy(
                        ip_port=f"{ip.text}:{port_str}",
                        scheme=SCHEME_HTTP,
                        status=STATUS_NEW,
                    ))
            except AttributeError:
                pass
    await sess.close()

    return res
示例#4
0
def addlist():
    proxyliststr = request.form["proxys"]
    proxys = proxyliststr.split(",")
    for proxy in proxys:
        proxy = proxy.split(":")
        if len(proxy) == 2:
            proxyapi.add(Proxy(ip = proxy[0], port = proxy[1]))

    return ""
示例#5
0
 def all(self):
     """
     返回所有代理
     """
     result = self.__db.zrangebyscore(self.__REDIS_STORAGE_KEY,
                                      self.__MIN_SCORE, self.__MAX_SCORE)
     proxies = []
     for i in result:
         proxies.append(Proxy().re_serialize(i))
     return proxies
示例#6
0
async def thespeedx_proxy_list():
    async with aiohttp.request(
            "GET",
            'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',
            proxy=proxies
    ) as resp:
        res = await resp.text()
        res = map(lambda x: Proxy(
            ip_port=f"{x}",
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        ), res.split("\n"))
        return list(res)
示例#7
0
async def proxy_scraper():
    async with aiohttp.request(
            "GET",
            'https://sunny9577.github.io/proxy-scraper/proxies.json',
            proxy=proxies
    ) as resp:
        res = await resp.json()
        res = map(lambda x: Proxy(
            ip_port=f"{x['ip']}:{x['port']}",
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        ), res["proxynova"])
        return list(res)
示例#8
0
async def cool_proxy():
    async with aiohttp.request(
            "GET",
            'https://cool-proxy.net/proxies.json',
            proxy=proxies
    ) as resp:
        res = await resp.json()
        res = map(lambda x: Proxy(
            ip_port=f"{x['ip']}:{x['port']}",
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        ), res)
        return list(res)
示例#9
0
async def pubproxy():
    async with aiohttp.request(
            "GET",
            'http://pubproxy.com/api/proxy?limit=5&format=json&type=http&level=anonymous&last_check=60',
            proxy=proxies
    ) as resp:
        res = await resp.json()
        res = map(lambda x: Proxy(
            ip_port=f"{x['ipPort']}",
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        ), res["data"])
        return list(res)
示例#10
0
 def show(self):
     """
     展示所有代理及分数
     """
     print('+{}+{}+'.format('-' * 21, '-' * 8))
     result = self.__db.zrevrangebyscore(self.__REDIS_STORAGE_KEY,
                                         self.__MAX_SCORE, self.__MIN_SCORE)
     for i in result:
         proxy = Proxy().re_serialize(i)
         score = self.__db.zscore(self.__REDIS_STORAGE_KEY, i)
         size = 20 - len(proxy.ip + proxy.port)
         print('|{}:{}{}|  {}  |'.format(proxy.ip, proxy.port, ' ' * size,
                                         score))
     print('+{}+{}+'.format('-' * 21, '-' * 8))
示例#11
0
async def ipaddress():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get('https://www.ipaddress.com/proxy-list/')
    for ip_row in resp.html.find('.proxylist tbody tr'):
        ip_port = ip_row.find('td:nth-child(1)', first=True).text
        p = Proxy(
            ip_port=ip_port,
            scheme=SCHEME_HTTP,
            status=STATUS_NEW,
        )
        res.append(p)
    await sess.close()

    return res
示例#12
0
async def free_proxy_list():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get('https://free-proxy-list.net/')
    for ip_row in resp.html.find('#proxylisttable tbody tr'):
        ip = ip_row.find('td:nth-child(1)', first=True)
        port = ip_row.find('td:nth-child(2)', first=True)
        if ip and port:
            res.append(Proxy(
                ip_port=f"{ip.text}:{port.text}",
                scheme=SCHEME_HTTP,
                status=STATUS_NEW,
            ))
    await sess.close()

    return res
示例#13
0
 def get_proxy(self):
     """
     获取num个较优的proxy。
     :param self:
     :param num: default 1, 返回个数
     2019年02月28日 11:50:55
     将筛选范围改为100-initial,不允许检测过无效的ip进入可选范围
     """
     result = self.__db.zrevrangebyscore(self.__REDIS_STORAGE_KEY,
                                         self.__MAX_SCORE,
                                         self.__INITIAL_SCORE)
     if len(result):
         proxies = []
         for i in range(0, self.__num):
             proxies.append(Proxy().re_serialize(choice(result)))
         return proxies
     else:
         raise PoolEmptyError
def search_proxy():
    url = "http://cn-proxy.com/"
    data = ""
    # while not data or data == "":
    #     try:
    #         data = requests.get(url).text
    #     except Exception:
    #         sleep(2)
    #         continue

    with open('/home/chenxiao/document/data', 'rt') as f:
        data = f.read()

    soup = BeautifulSoup(data, 'html.parser')
    tbody = soup.findAll('tbody')[1]
    tr_list = tbody.findAll('tr')
    for tr in tr_list:
        td_list = tr.findAll('td')

        proxy = Proxy()

        speed = get_speed(td_list[3])
        if speed < 70:
            continue

        proxy.speed = speed
        proxy.url = td_list[0].text + ":" + td_list[1].text
        proxy.position = td_list[2].text
        time_string = td_list[4].text
        time = datetime.datetime.strptime(time_string, '%Y-%m-%d %H:%M:%S')
        time = timezone('Asia/Shanghai').localize(time)
        utc_time = time.astimezone(utc)
        proxy.last_check = utc_time

        if not Proxy.objects(url=proxy.url):
            print("加入代理服务器:   {}".format(proxy.url))
            proxy.save()
示例#15
0
async def proxy_list():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get('http://proxy-list.org/english/index.php')
    for ul in resp.html.find('#proxy-table > div.table-wrap ul'):
        js_code = ul.find('li.proxy script', first=True).text
        matched = re.findall(r"Proxy\('(.+)'\)", js_code)
        if matched and len(matched) > 0:
            encoded = matched[0]
            ip_port = base64.b64decode(encoded).decode("utf-8")
            ip = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port)[0]
            port = re.findall(r':(\d{2,5})', ip_port)[0]
            res.append(Proxy(
                ip_port=f"{ip}:{port}",
                scheme=SCHEME_HTTP,
                status=STATUS_NEW,
            ))
    await sess.close()

    return res
示例#16
0
async def clarketm_proxy_list():
    res = list()
    async with aiohttp.request(
            "GET",
            'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt',
            proxy=proxies
    ) as resp:
        txt = await resp.text()
        ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', txt)

        for ip_port in ip_port_str_list:

            ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
            port = re.search(r':(\d{2,5})', ip_port).group(1)

            if ip and port:
                res.append(Proxy(
                    ip_port=f"{ip}:{port}",
                    scheme=SCHEME_HTTP,
                    status=STATUS_NEW,
                ))
    return res
示例#17
0
async def spys_one():
    res = list()

    sess = AsyncHTMLSession()
    resp = await sess.get('http://spys.one/en/anonymous-proxy-list')
    await resp.html.arender(wait=1.5, timeout=10.0)
    for ip_row in resp.html.find('table tr[onmouseover]'):
        ip_port_text_elem = ip_row.find('.spy14', first=True)
        if ip_port_text_elem:
            ip_port_text = ip_port_text_elem.text

            ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port_text).group(0)
            port = re.search(r':\n(\d{2,5})', ip_port_text).group(1)

            if ip and port:
                res.append(Proxy(
                    ip_port=f"{ip}:{port}",
                    scheme=SCHEME_HTTP,
                    status=STATUS_NEW,
                ))
    await sess.close()

    return res
示例#18
0
 def __parse_html(self, html, link_type):
     """
     解析网页
     :param html: 源码
     """
     result = list()
     doc = pq(html)
     trs = doc('#ip_list > tr').items()
     trs.__next__()
     for tr in trs:
         text_speed = tr('.bar').attr('title')
         if self.__check_delay(text_speed):
             child = tr.children()
             ip = child.eq(1).text()
             port = child.eq(2).text()
             site = child.eq(3).text()
             result.append(
                 Proxy(ip=ip,
                       port=port,
                       site=site,
                       p_type=link_type,
                       delay=text_speed))
     return result
示例#19
0
    def post(self):
        '''
        Create new proxies.

        **Example Request**

        .. sourcecode:: json

            {
                "proxies": [
                    {
                        "protocol": "http",
                        "host": "192.168.0.2",
                        "port": 80,
                        "username": "******",
                        "password": "******",
                        "active": true,
                    },
                    ...
                ]
            }

        **Example Response**

        .. sourcecode:: json

            {
                "message": "1 proxy created."
            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :<json list proxies: list of proxies
        :<json str proxies[n]["protocol"]: protocol of proxy address
        :<json str proxies[n]["host"]: host of proxy address
        :<json int proxies[n]["port"]: port of proxy address
        :<json str proxies[n]["username"]: username of proxy
        :<json str proxies[n]["password"]: password of proxy
        :<json bool proxies[n]["active"]: proxy active status

        :>header Content-Type: application/json
        :>json string message: API response message

        :status 200: created
        :status 400: invalid request body
        :status 401: authentication required
        '''
        request_json = request.get_json()
        proxies = []

        # Ensure all data is valid before db operations
        for proxy_json in request_json['proxies']:
            validate_request_json(proxy_json, PROXY_ATTRS)

        # Save proxies
        for proxy_json in request_json['proxies']:
            proxy = Proxy(protocol=proxy_json['protocol'].lower().strip(),
                          host=proxy_json['host'].lower().strip(),
                          port=proxy_json['port'],
                          active=proxy_json['active'])

            # Username is optional, and can be None
            try:
                proxy.username = proxy_json['username'].lower().strip()
            except KeyError:
                pass
            except AttributeError:
                proxy.username = None

            # Password is optional, and can be None
            try:
                proxy.password = proxy_json['password'].strip()
            except KeyError:
                pass
            except AttributeError:
                proxy.password = None

            g.db.add(proxy)

            try:
                g.db.flush()
                proxies.append(proxy)
            except IntegrityError:
                g.db.rollback()
                raise BadRequest('Proxy {}://{}:{} already exists.'.format(
                    proxy.protocol, proxy.host, proxy.port))

        g.db.commit()

        # Send redis notifications
        for proxy in proxies:
            notify_mask_client(channel='proxy',
                               message={
                                   'proxy': proxy.as_dict(),
                                   'status': 'created',
                                   'resource': None
                               })

        message = '{} new proxies created'.format(len(request_json['proxies']))
        response = jsonify(message=message)
        response.status_code = 202

        return response
示例#20
0
from api import proxyapi
from model import Proxy
import logging

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    pfile = open("work-proxy.list")

    proxystr = pfile.readline()
    while proxystr:
        proxyarr = proxystr.split(':')
        proxy = Proxy(ip=proxyarr[0], port=proxyarr[1].replace('\n', ''))
        proxyapi.add(proxy)
        proxystr = pfile.readline()
示例#21
0
def get_proxy(item):
    try:
        proxy = Proxy()
        td = item.find_all('td')
        proxy.country = td[0].find('img')['alt']
        proxy.ip = td[1].text
        proxy.port = td[2].text
        proxy.address = td[3].find('a').text
        proxy.status = td[5].text
        proxy.speed = td[6].find('div')['title'][:-1]
        proxy.ping = td[7].find('div')['title'][:-1]
        proxy.live_time = td[8].text
        proxy.timestamp = '20' + td[9].text + ':00'
        proxy.insert()
    except TypeError:
        print('GET PROXY NONE TYPE')