示例#1
0
 def genProxy(self, rawls):
     for i in range(1, len(rawls), 8):
         try:
             if rawls[i:i + 7]:
                 # print(rawls[i:i+7])
                 yield Proxy(rawls[i], rawls[i + 1], rawls[i + 4])
         except IndexError:
             pass
示例#2
0
文件: spcCrawl.py 项目: Nebell/PPPool
 def _pageParse(self, html):
     xpParser = XpathParser(html, ".//table[@class='fl-table']//tr")
     for trElem in xpParser.rawResultls:
         try:
             proxy = Proxy(trElem[0].text.split(':')[0],
                           trElem[0].text.split(':')[1], trElem[1].text,
                           trElem[2].text, trElem[3].text)
             self.db.put(proxy) if self.db else self.proxyLs.append(proxy)
         except IndexError:
             pass
示例#3
0
def verifyProxy(proxy: Proxy, url: str = "http://www.baidu.com/", timeout=1):
    if not url:
        url = "http://www.baidu.com/"
    try:
        # 设定代理
        proxies = {
            "http":
            proxy.protocol + "://" + ":".join([proxy.host, proxy.port]),
            "https":
            proxy.protocol + "://" + ":".join([proxy.host, proxy.port])
        }
        resp = requests.get(url, proxies=proxies, timeout=timeout)
        proxy.last_verified_time = time.asctime()

        # 代理可用则修改状态 不可用一律报错
        if 200 == resp.status_code and resp.text:
            proxy.status = "available"
            return True
        else:
            raise BaseException("unavailable")
    except (Exception, BaseException):
        proxy.status = "unavailable"
        proxy.fail_count += 1
        return False
示例#4
0
文件: spcCrawl.py 项目: Nebell/PPPool
    def _pageParse(self, html):
        try:
            xpParser = XpathParser(html, ".//table[@id='ip_list']/tr")
        except:
            pass

        for trElem in xpParser.rawResultls[1:]:
            try:
                if trElem[1].text:
                    proxy = Proxy(trElem[1].text, trElem[2].text,
                                  trElem[5].text.replace("QQ", "socks"),
                                  trElem[4].text)
                    # 如果存在数据库则储存在数据库里
                    self.db.put(proxy) if self.db else self.proxyLs.append(
                        proxy)
            except IndexError:
                pass
示例#5
0
文件: spcDB.py 项目: Nebell/PPPool
 def getAll(self):
     itemDict = self.__db.hgetall(self.__name)
     return [Proxy.genFromJson(itemDict[key]) for key in itemDict]
示例#6
0
文件: spcDB.py 项目: Nebell/PPPool
 def get(self, proxy_host: str):
     # 从集合中获取Proxy
     info_json = self.__db.hget(self.__name, proxy_host)
     proxy = Proxy.genFromJson(info_json)
     return proxy
示例#7
0
文件: testDB.py 项目: Nebell/PPPool
# -*- coding: utf-8 -*-

import sys
sys.path.append("../")
from db.spcDB import RedisDB
from utils.proxyModel import Proxy

if "__main__" == __name__:
    db = RedisDB("test")
    p = Proxy.genFromStr("127.0.0.1 1080 SOCKS5")
    p.level = "High"
    p.region = "China"

    db.put(p)
    p2 = db.get(p.host)
    db.clear()
    print(db.exists(p.host))