def __init__(self, dbname="test", collection="test", host=None, port=None): self.dbn = dbname self.coln = collection self.client = MongoClient(host, port) self.db = self.client[dbname] self.collection = self.db[collection] self.log = Log(__class__.__name__)
class Kuai(object): def __init__(self): self.log = Log(__class__.__name__) pass @staticmethod @check def get_response(url): resp = requests.get(url=url, headers=HEADER) resp.encoding = resp.apparent_encoding resp.raise_for_status return resp @property def ips(self): for url in URL: r = self.get_response(url) try: html = etree.HTML(r.text) except AttributeError as e: self.log.error("Response is None,ErrorType:%s" % e) else: iplist = html.xpath(XPATH) for ip in iplist: proxy = ':'.join(ip.xpath("./td/text()")[:2]) self.log.info("Find:%s" % proxy) yield proxy
class You(object): def __init__(self): self.log = Log(__class__.__name__) pass @staticmethod def get_response(url): resp = requests.get(url=url, headers = HEADER) resp.encoding = resp.apparent_encoding resp.raise_for_status return resp @property def ips(self): lis = [] start_resp = self.get_response(start_url) try: html = etree.HTML(start_resp.text) except AttributeError as e: self.log.error("Response is None,ErrorType:%s" % e) else: parse_url = html.xpath(XPATH)[0] num = re.search("\d+",parse_url) link = re.sub("\d+",num.group(0)+"_{index}",parse_url) urls = (link.format(index=ind) for ind in range(2, PAGE + 1)) r = self.get_response(parse_url) try: lis = lis + re.findall("(?:\d{1,3}\.){3}\d{1,3}\:\d{1,4}", r.text) except AttributeError as e: self.log.error("Response is None,ErrorType:%s" % e) else: for url in urls: r = self.get_response(url) lis = lis + re.findall("(?:\d{1,3}\.){3}\d{1,3}\:\d{1,4}",r.text) for x in lis: self.log.info("Find:%s" % x) yield x
class Mongo(object): def __init__(self, dbname="test", collection="test", host=None, port=None): self.dbn = dbname self.coln = collection self.client = MongoClient(host, port) self.db = self.client[dbname] self.collection = self.db[collection] self.log = Log(__class__.__name__) def insert(self, dict_data): if isinstance(dict_data, dict): # check existance before insert. if not self.is_exist(dict_data): self.collection.insert(dict_data) self.log.info("Insert dict is :%s" % (str(dict_data))) else: self.log.info("Error: %s,dict is :%s" % (TypeError, eval(dict_data))) raise TypeError def delete(self, key, value): if value or key: if self.collection.delete({key: value}): self.log.info( "MongoDB delete Error: %s,key is :%s,value is :%s" % ("Not exist", key, value)) else: self.log.error("MongoDB delete Error: %s,key is :%s,value is :%s" % (AttributeError, key, value)) raise AttributeError # find all remarks with key def find_all(self): for results in self.collection.find(): yield results # find one remark with key def find_one(self): return random.choice([proxy for proxy in self.find_all()]) def is_exist(self, dict_data): if self.collection.find_one(dict_data): self.log.warning("Dict is exist :%s" % (str(dict_data))) return True else: return False def delete_all(self): self.collection.remove() self.log.warning("MongoDB delete all remarks.") # function drop_database is belong to MongoClient.A difference between Mongo shell. def drop_db(self): self.client.drop_database(self.dbn) self.log.warning("MongoDB delete db %s." % self.dbn) def drop_collection(self): self.collection.drop() self.log.warning("MongoDB delete collection %s." % self.coln)
# encoding: utf-8 from Proxy import kuai from Proxy import xici from Proxy import you import requests from Proxy.Test import test_link from Proxy.log import Log from requests.packages.urllib3.exceptions import InsecureRequestWarning from Proxy.DB import db requests.packages.urllib3.disable_warnings(InsecureRequestWarning) log = Log("Control") lis = [] def get_ips(): for ip in kuai.ips: #isinstance = generator if test_link(ip): log.info(ip+" is checkde be useful!") # must insert dict type db.insert({"proxy": ip}) for ip in xici.ips: if test_link(ip): log.info(ip+" is checkde be useful!") # must insert dict type db.insert({"proxy": ip}) for ip in you.ips: if test_link(ip):
def __init__(self): self.log = Log(__class__.__name__) pass
import requests from Proxy.error import Linkexception from Proxy.log import Log log = Log("Test_link") url = "https://www.baidu.com" def test_link(proxy): try: proxies = {"https": "https://{proxy}".format(proxy=proxy)} resp = requests.get(url=url, proxies=proxies, timeout=10, verify=False) resp.raise_for_status except Linkexception as e: log.error("%s,proxy :%s" % (e, proxy)) except IOError as e: log.error("%s,proxy :%s" % (e, proxy)) except requests.ProxyError as e: log.error("%s,proxy :%s" % (e, proxy)) except requests.HTTPErrorError as e: log.error("%s,proxy :%s" % (e, proxy)) except requests.TimeoutError as e: log.error("%s,proxy :%s" % (e, proxy)) except requests.ConnectionError as e: log.error("%s,proxy :%s" % (e, proxy)) except requests.ProtocolError as e: log.error("%s,proxy :%s" % (e, proxy)) except: pass else: return True
from Proxy.log import Log log = Log("Error") def check(func): def wrapper(self, *args, **kwargs): result= func(self, *args, **kwargs) if result.status_code == 200: return result else: log.error("Get Response Error in function :%s."%func.__name__) pass return wrapper class Linkexception(Exception): def __init__(self, err="Link test error"): Exception.__init__(self, err)