def genProxy(self, rawls): for i in range(1, len(rawls), 8): try: if rawls[i:i + 7]: # print(rawls[i:i+7]) yield Proxy(rawls[i], rawls[i + 1], rawls[i + 4]) except IndexError: pass
def _pageParse(self, html): xpParser = XpathParser(html, ".//table[@class='fl-table']//tr") for trElem in xpParser.rawResultls: try: proxy = Proxy(trElem[0].text.split(':')[0], trElem[0].text.split(':')[1], trElem[1].text, trElem[2].text, trElem[3].text) self.db.put(proxy) if self.db else self.proxyLs.append(proxy) except IndexError: pass
def verifyProxy(proxy: Proxy, url: str = "http://www.baidu.com/", timeout=1): if not url: url = "http://www.baidu.com/" try: # 设定代理 proxies = { "http": proxy.protocol + "://" + ":".join([proxy.host, proxy.port]), "https": proxy.protocol + "://" + ":".join([proxy.host, proxy.port]) } resp = requests.get(url, proxies=proxies, timeout=timeout) proxy.last_verified_time = time.asctime() # 代理可用则修改状态 不可用一律报错 if 200 == resp.status_code and resp.text: proxy.status = "available" return True else: raise BaseException("unavailable") except (Exception, BaseException): proxy.status = "unavailable" proxy.fail_count += 1 return False
def _pageParse(self, html): try: xpParser = XpathParser(html, ".//table[@id='ip_list']/tr") except: pass for trElem in xpParser.rawResultls[1:]: try: if trElem[1].text: proxy = Proxy(trElem[1].text, trElem[2].text, trElem[5].text.replace("QQ", "socks"), trElem[4].text) # 如果存在数据库则储存在数据库里 self.db.put(proxy) if self.db else self.proxyLs.append( proxy) except IndexError: pass
def getAll(self): itemDict = self.__db.hgetall(self.__name) return [Proxy.genFromJson(itemDict[key]) for key in itemDict]
def get(self, proxy_host: str): # 从集合中获取Proxy info_json = self.__db.hget(self.__name, proxy_host) proxy = Proxy.genFromJson(info_json) return proxy
# -*- coding: utf-8 -*- import sys sys.path.append("../") from db.spcDB import RedisDB from utils.proxyModel import Proxy if "__main__" == __name__: db = RedisDB("test") p = Proxy.genFromStr("127.0.0.1 1080 SOCKS5") p.level = "High" p.region = "China" db.put(p) p2 = db.get(p.host) db.clear() print(db.exists(p.host))