def __init__(self): self.useful_proxy = UsefulProxyDocsModel() self.raw_proxy = RawProxyDocsModel() self.domain_counter = DomainCounterDocsModel() self.datx = datx.City(IP_DATA_PATH) self.quality_useful_proxy_list = [] self.quality_domain_index = {}
class ProxyManager(object): def __init__(self): self.useful_proxy = UsefulProxyDocsModel() self.raw_proxy = RawProxyDocsModel() self.domain_counter = DomainCounterDocsModel() self.fetchers = FetchersDocsModel() self.datx = datx.City(IP_DATA_PATH) self.quality_useful_proxy_list = [] self.quality_domain_index = {} def cleanUsefulProxy(self, **kwargs): result = self.useful_proxy.cleanUsefulProxy(**kwargs) return result def cleanRawProxy(self, **kwargs): result = self.raw_proxy.cleanRawProxy(**kwargs) return result def getAllValidUsefulProxy(self, **kwargs): result = self.useful_proxy.getAllValidUsefulProxy(**kwargs) return result def getAllUsefulProxy(self, **kwargs): result = self.useful_proxy.getAllUsefulProxy(**kwargs) return result def getVerifyUsefulProxy(self): now = int(time.time()) result = self.useful_proxy.getVerifyUsefulProxy(now) return result def getLowQualityUsefulProxy(self, **kwagrs): result = self.useful_proxy.getLowQualityUsefulProxy(**kwagrs) return result def getHighQualityUsefulProxy(self, **kwagrs): result = self.useful_proxy.getHighQualityUsefulProxy(**kwagrs) return result def getAllRawProxy(self): result = self.raw_proxy.getAllRawProxy() return result def checkRawProxyExists(self, proxy): result = self.raw_proxy.checkRawProxyExists(proxy) return result def checkUsefulProxyExists(self, proxy): result = self.useful_proxy.checkUsefulProxyExists(proxy) return result def getSampleUsefulProxy(self, **kwargs): result = self.useful_proxy.getSampleUsefulProxy(**kwargs) return result def getQualityUsefulProxy(self, **kwargs): count = kwargs.get("count", 1) domain = kwargs.get("domain", None) index = self.quality_domain_index.get(domain, 0) if index == 0: self.quality_useful_proxy_list = self.useful_proxy.getQualityUsefulProxy( **kwargs) index = (count - 1) % len(self.quality_useful_proxy_list) self.quality_domain_index[domain] = index + 1 result = self.quality_useful_proxy_list[index] return result def deleteRawProxy(self, proxy): self.raw_proxy.deleteRawProxy(proxy) def saveRawProxy(self, proxy): data = { "proxy": proxy, "health": ConfigManager.setting_config.setting.get("init_raw_proxy_health") } self.raw_proxy.saveRawProxy(data) def getProxyRegion(self, ip): data = self.datx.find(ip) region_list = data[:3] result = [] for item in region_list: if item and item not in result: result.append(item) return result def saveUsefulProxy(self, proxy): ip = proxy.split(":")[0] region_list = self.getProxyRegion(ip) data = { "proxy": proxy, "succ": 0, "keep_succ": 0, "fail": 0, "total": 0, "quality": 0, "https": PROXY_HTTPS["UNKNOWN"], "type": PROXY_TYPE["UNKNOWN"], "region_list": region_list, "last_status": PROXY_LAST_STATUS["UNKNOWN"], "last_succ_time": 0, "next_verify_time": 0, } self.useful_proxy.saveUsefulProxy(data) def updateUsefulProxy(self, item, info): data = {"$set": {}} if item.get("type") == PROXY_TYPE["UNKNOWN"]: data["$set"]["type"] = info["type"] if item.get("https") == PROXY_HTTPS["UNKNOWN"]: data["$set"]["https"] = info["https"] if len(data["$set"]) > 0: self.useful_proxy.updateUsefulProxy(item["proxy"], data) def deleteUsefulProxy(self, proxy): self.useful_proxy.deleteUsefulProxy(proxy) def tickUsefulProxyVaildSucc(self, proxy): self.useful_proxy.tickUsefulProxyVaildSucc(proxy) def tickUsefulProxyVaildFail(self, proxy): self.useful_proxy.tickUsefulProxyVaildFail(proxy) def tickUsefulProxyVaildTotal(self, proxy): self.useful_proxy.tickUsefulProxyVaildTotal(proxy) def updateUsefulProxyNextVerifyTime(self, proxy, start_time=None): item = self.getProxy(proxy) multiple = abs(item["quality"]) if item["quality"] > 0: multiple = 0 start_time = start_time if start_time else int(time.time()) interval = ConfigManager.setting_config.setting.get( "verify_useful_proxy_interval") next_verify_time = start_time + (multiple * interval * 60) query = {"proxy": proxy} data = {"$set": {"next_verify_time": next_verify_time}} self.useful_proxy.updateProxy(query, data) def tickRawProxyVaildFail(self, proxy): self.raw_proxy.tickRawProxyVaildFail(proxy) def getProxy(self, proxy): result = self.useful_proxy.getProxy(proxy) return result def getProxyNumber(self): total_raw_proxy = self.getRawProxyNumber() total_useful_queue = self.getUsefulProxyNumber() result = { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } return result def getRawProxyNumber(self): result = self.raw_proxy.getProxyNum() return result def getUsefulProxyNumber(self): result = self.useful_proxy.getProxyNum() return result def tickDomainRequestState(self, domain, code): self.domain_counter.tickDomainRequestState(domain, code) def getDomainCounter(self, domain): result = self.domain_counter.getDomainCounter(domain) return result def getAllFetcher(self): result = self.fetchers.getAllFetcher() return result def getExecFetcher(self): now = int(time.time()) result = self.fetchers.getExecFetcher(now) return result def getFetcher(self, name): result = self.fetchers.getFetcher(name) return result def updateFetcher(self, name, data): self.fetchers.updateFetcher(name, data)
class ProxyManager(object): def __init__(self): self.useful_proxy = UsefulProxyDocsModel() self.raw_proxy = RawProxyDocsModel() self.domain_counter = DomainCounterDocsModel() self.datx = datx.City(IP_DATA_PATH) self.quality_useful_proxy_list = [] self.quality_domain_index = {} def cleanUsefulProxy(self, **kwargs): result = self.useful_proxy.cleanUsefulProxy(**kwargs) return result def cleanRawProxy(self, **kwargs): result = self.raw_proxy.cleanRawProxy(**kwargs) return result def getAllValidUsefulProxy(self, **kwargs): result = self.useful_proxy.getAllValidUsefulProxy(**kwargs) return result def getAllUsefulProxy(self, **kwargs): result = self.useful_proxy.getAllUsefulProxy(**kwargs) return result def getAllRawProxy(self): result = self.raw_proxy.getAllRawProxy() return result def checkRawProxyExists(self, proxy): result = self.raw_proxy.checkRawProxyExists(proxy) return result def checkUsefulProxyExists(self, proxy): result = self.useful_proxy.checkUsefulProxyExists(proxy) return result def getSampleUsefulProxy(self, **kwargs): result = self.useful_proxy.getSampleUsefulProxy(**kwargs) return result def getQualityUsefulProxy(self, **kwargs): count = kwargs.get("count", 1) domain = kwargs.get("domain", None) index = self.quality_domain_index.get(domain, 0) if index == 0: self.quality_useful_proxy_list = self.useful_proxy.getQualityUsefulProxy( **kwargs) index = (count - 1) % len(self.quality_useful_proxy_list) self.quality_domain_index[domain] = index + 1 result = self.quality_useful_proxy_list[index] return result def deleteRawProxy(self, proxy): self.raw_proxy.deleteRawProxy(proxy) def saveRawProxy(self, proxy): data = { "proxy": proxy, "health": ConfigManager.setting_config.setting.get("init_raw_proxy_health") } self.raw_proxy.saveRawProxy(data) def getProxyRegion(self, ip): data = self.datx.find(ip) region_list = data[:3] result = [] for item in region_list: if item and item not in result: result.append(item) return result def saveUsefulProxy(self, proxy): ip = proxy.split(":")[0] region_list = self.getProxyRegion(ip) data = { "proxy": proxy, "succ": 0, "keep_succ": 0, "fail": 0, "total": 0, "https": PROXY_HTTPS["UNKNOWN"], "type": PROXY_TYPE["UNKNOWN"], "region_list": region_list, "last_status": PROXY_LAST_STATUS["UNKNOWN"], "last_succ_time": 0, } self.useful_proxy.saveUsefulProxy(data) def updateUsefulProxy(self, item, info): data = {"$set": {}} if item.get("type") == PROXY_TYPE["UNKNOWN"]: data["$set"]["type"]: info.type if item.get("https") == PROXY_HTTPS["UNKNOWN"]: data["$set"]["https"] = info.https if len(data["$set"]) > 0: self.useful_proxy.updateUsefulProxy(info.address, data) def deleteUsefulProxy(self, proxy): self.useful_proxy.deleteUsefulProxy(proxy) def tickUsefulProxyVaildSucc(self, proxy): self.useful_proxy.tickUsefulProxyVaildSucc(proxy) def tickUsefulProxyVaildFail(self, proxy): self.useful_proxy.tickUsefulProxyVaildFail(proxy) def tickUsefulProxyVaildTotal(self, proxy): self.useful_proxy.tickUsefulProxyVaildTotal(proxy) def tickRawProxyVaildFail(self, proxy): self.raw_proxy.tickRawProxyVaildFail(proxy) def getProxyNumber(self): total_raw_proxy = self.getRawProxyNumber() total_useful_queue = self.getUsefulProxyNumber() result = { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } return result def getRawProxyNumber(self): result = self.raw_proxy.getProxyNum() return result def getUsefulProxyNumber(self): result = self.useful_proxy.getProxyNum() return result def tickDomainRequestState(self, domain, code): self.domain_counter.tickDomainRequestState(domain, code) def getDomainCounter(self, domain): result = self.domain_counter.getDomainCounter(domain) return result
def __init__(self): self.useful_proxy = UsefulProxyDocsModel() self.raw_proxy = RawProxyDocsModel() self.datx = datx.City(IP_DATA_PATH)