示例#1
0
    def __init__(self):
        self.useful_proxy = UsefulProxyDocsModel()
        self.raw_proxy = RawProxyDocsModel()
        self.domain_counter = DomainCounterDocsModel()
        self.datx = datx.City(IP_DATA_PATH)

        self.quality_useful_proxy_list = []
        self.quality_domain_index = {}
示例#2
0
class ProxyManager(object):
    def __init__(self):
        self.useful_proxy = UsefulProxyDocsModel()
        self.raw_proxy = RawProxyDocsModel()
        self.domain_counter = DomainCounterDocsModel()
        self.fetchers = FetchersDocsModel()
        self.datx = datx.City(IP_DATA_PATH)

        self.quality_useful_proxy_list = []
        self.quality_domain_index = {}

    def cleanUsefulProxy(self, **kwargs):
        result = self.useful_proxy.cleanUsefulProxy(**kwargs)
        return result

    def cleanRawProxy(self, **kwargs):
        result = self.raw_proxy.cleanRawProxy(**kwargs)
        return result

    def getAllValidUsefulProxy(self, **kwargs):
        result = self.useful_proxy.getAllValidUsefulProxy(**kwargs)
        return result

    def getAllUsefulProxy(self, **kwargs):
        result = self.useful_proxy.getAllUsefulProxy(**kwargs)
        return result

    def getVerifyUsefulProxy(self):
        now = int(time.time())
        result = self.useful_proxy.getVerifyUsefulProxy(now)
        return result

    def getLowQualityUsefulProxy(self, **kwagrs):
        result = self.useful_proxy.getLowQualityUsefulProxy(**kwagrs)
        return result

    def getHighQualityUsefulProxy(self, **kwagrs):
        result = self.useful_proxy.getHighQualityUsefulProxy(**kwagrs)
        return result

    def getAllRawProxy(self):
        result = self.raw_proxy.getAllRawProxy()
        return result

    def checkRawProxyExists(self, proxy):
        result = self.raw_proxy.checkRawProxyExists(proxy)
        return result

    def checkUsefulProxyExists(self, proxy):
        result = self.useful_proxy.checkUsefulProxyExists(proxy)
        return result

    def getSampleUsefulProxy(self, **kwargs):
        result = self.useful_proxy.getSampleUsefulProxy(**kwargs)
        return result

    def getQualityUsefulProxy(self, **kwargs):
        count = kwargs.get("count", 1)
        domain = kwargs.get("domain", None)

        index = self.quality_domain_index.get(domain, 0)

        if index == 0:
            self.quality_useful_proxy_list = self.useful_proxy.getQualityUsefulProxy(
                **kwargs)

        index = (count - 1) % len(self.quality_useful_proxy_list)
        self.quality_domain_index[domain] = index + 1

        result = self.quality_useful_proxy_list[index]
        return result

    def deleteRawProxy(self, proxy):
        self.raw_proxy.deleteRawProxy(proxy)

    def saveRawProxy(self, proxy):
        data = {
            "proxy":
            proxy,
            "health":
            ConfigManager.setting_config.setting.get("init_raw_proxy_health")
        }
        self.raw_proxy.saveRawProxy(data)

    def getProxyRegion(self, ip):
        data = self.datx.find(ip)
        region_list = data[:3]
        result = []
        for item in region_list:
            if item and item not in result:
                result.append(item)

        return result

    def saveUsefulProxy(self, proxy):
        ip = proxy.split(":")[0]
        region_list = self.getProxyRegion(ip)

        data = {
            "proxy": proxy,
            "succ": 0,
            "keep_succ": 0,
            "fail": 0,
            "total": 0,
            "quality": 0,
            "https": PROXY_HTTPS["UNKNOWN"],
            "type": PROXY_TYPE["UNKNOWN"],
            "region_list": region_list,
            "last_status": PROXY_LAST_STATUS["UNKNOWN"],
            "last_succ_time": 0,
            "next_verify_time": 0,
        }

        self.useful_proxy.saveUsefulProxy(data)

    def updateUsefulProxy(self, item, info):
        data = {"$set": {}}

        if item.get("type") == PROXY_TYPE["UNKNOWN"]:
            data["$set"]["type"] = info["type"]

        if item.get("https") == PROXY_HTTPS["UNKNOWN"]:
            data["$set"]["https"] = info["https"]

        if len(data["$set"]) > 0:
            self.useful_proxy.updateUsefulProxy(item["proxy"], data)

    def deleteUsefulProxy(self, proxy):
        self.useful_proxy.deleteUsefulProxy(proxy)

    def tickUsefulProxyVaildSucc(self, proxy):
        self.useful_proxy.tickUsefulProxyVaildSucc(proxy)

    def tickUsefulProxyVaildFail(self, proxy):
        self.useful_proxy.tickUsefulProxyVaildFail(proxy)

    def tickUsefulProxyVaildTotal(self, proxy):
        self.useful_proxy.tickUsefulProxyVaildTotal(proxy)

    def updateUsefulProxyNextVerifyTime(self, proxy, start_time=None):

        item = self.getProxy(proxy)
        multiple = abs(item["quality"])
        if item["quality"] > 0:
            multiple = 0

        start_time = start_time if start_time else int(time.time())
        interval = ConfigManager.setting_config.setting.get(
            "verify_useful_proxy_interval")
        next_verify_time = start_time + (multiple * interval * 60)

        query = {"proxy": proxy}
        data = {"$set": {"next_verify_time": next_verify_time}}
        self.useful_proxy.updateProxy(query, data)

    def tickRawProxyVaildFail(self, proxy):
        self.raw_proxy.tickRawProxyVaildFail(proxy)

    def getProxy(self, proxy):
        result = self.useful_proxy.getProxy(proxy)
        return result

    def getProxyNumber(self):
        total_raw_proxy = self.getRawProxyNumber()
        total_useful_queue = self.getUsefulProxyNumber()
        result = {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
        return result

    def getRawProxyNumber(self):
        result = self.raw_proxy.getProxyNum()
        return result

    def getUsefulProxyNumber(self):
        result = self.useful_proxy.getProxyNum()
        return result

    def tickDomainRequestState(self, domain, code):

        self.domain_counter.tickDomainRequestState(domain, code)

    def getDomainCounter(self, domain):
        result = self.domain_counter.getDomainCounter(domain)
        return result

    def getAllFetcher(self):
        result = self.fetchers.getAllFetcher()
        return result

    def getExecFetcher(self):
        now = int(time.time())
        result = self.fetchers.getExecFetcher(now)
        return result

    def getFetcher(self, name):
        result = self.fetchers.getFetcher(name)
        return result

    def updateFetcher(self, name, data):
        self.fetchers.updateFetcher(name, data)
示例#3
0
class ProxyManager(object):
    def __init__(self):
        self.useful_proxy = UsefulProxyDocsModel()
        self.raw_proxy = RawProxyDocsModel()
        self.domain_counter = DomainCounterDocsModel()
        self.datx = datx.City(IP_DATA_PATH)

        self.quality_useful_proxy_list = []
        self.quality_domain_index = {}

    def cleanUsefulProxy(self, **kwargs):
        result = self.useful_proxy.cleanUsefulProxy(**kwargs)
        return result

    def cleanRawProxy(self, **kwargs):
        result = self.raw_proxy.cleanRawProxy(**kwargs)
        return result

    def getAllValidUsefulProxy(self, **kwargs):
        result = self.useful_proxy.getAllValidUsefulProxy(**kwargs)
        return result

    def getAllUsefulProxy(self, **kwargs):
        result = self.useful_proxy.getAllUsefulProxy(**kwargs)
        return result

    def getAllRawProxy(self):
        result = self.raw_proxy.getAllRawProxy()
        return result

    def checkRawProxyExists(self, proxy):
        result = self.raw_proxy.checkRawProxyExists(proxy)
        return result

    def checkUsefulProxyExists(self, proxy):
        result = self.useful_proxy.checkUsefulProxyExists(proxy)
        return result

    def getSampleUsefulProxy(self, **kwargs):
        result = self.useful_proxy.getSampleUsefulProxy(**kwargs)
        return result

    def getQualityUsefulProxy(self, **kwargs):
        count = kwargs.get("count", 1)
        domain = kwargs.get("domain", None)

        index = self.quality_domain_index.get(domain, 0)

        if index == 0:
            self.quality_useful_proxy_list = self.useful_proxy.getQualityUsefulProxy(
                **kwargs)

        index = (count - 1) % len(self.quality_useful_proxy_list)
        self.quality_domain_index[domain] = index + 1

        result = self.quality_useful_proxy_list[index]
        return result

    def deleteRawProxy(self, proxy):
        self.raw_proxy.deleteRawProxy(proxy)

    def saveRawProxy(self, proxy):
        data = {
            "proxy":
            proxy,
            "health":
            ConfigManager.setting_config.setting.get("init_raw_proxy_health")
        }
        self.raw_proxy.saveRawProxy(data)

    def getProxyRegion(self, ip):
        data = self.datx.find(ip)
        region_list = data[:3]
        result = []
        for item in region_list:
            if item and item not in result:
                result.append(item)

        return result

    def saveUsefulProxy(self, proxy):
        ip = proxy.split(":")[0]
        region_list = self.getProxyRegion(ip)

        data = {
            "proxy": proxy,
            "succ": 0,
            "keep_succ": 0,
            "fail": 0,
            "total": 0,
            "https": PROXY_HTTPS["UNKNOWN"],
            "type": PROXY_TYPE["UNKNOWN"],
            "region_list": region_list,
            "last_status": PROXY_LAST_STATUS["UNKNOWN"],
            "last_succ_time": 0,
        }

        self.useful_proxy.saveUsefulProxy(data)

    def updateUsefulProxy(self, item, info):
        data = {"$set": {}}

        if item.get("type") == PROXY_TYPE["UNKNOWN"]:
            data["$set"]["type"]: info.type

        if item.get("https") == PROXY_HTTPS["UNKNOWN"]:
            data["$set"]["https"] = info.https

        if len(data["$set"]) > 0:
            self.useful_proxy.updateUsefulProxy(info.address, data)

    def deleteUsefulProxy(self, proxy):
        self.useful_proxy.deleteUsefulProxy(proxy)

    def tickUsefulProxyVaildSucc(self, proxy):
        self.useful_proxy.tickUsefulProxyVaildSucc(proxy)

    def tickUsefulProxyVaildFail(self, proxy):
        self.useful_proxy.tickUsefulProxyVaildFail(proxy)

    def tickUsefulProxyVaildTotal(self, proxy):
        self.useful_proxy.tickUsefulProxyVaildTotal(proxy)

    def tickRawProxyVaildFail(self, proxy):
        self.raw_proxy.tickRawProxyVaildFail(proxy)

    def getProxyNumber(self):
        total_raw_proxy = self.getRawProxyNumber()
        total_useful_queue = self.getUsefulProxyNumber()
        result = {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
        return result

    def getRawProxyNumber(self):
        result = self.raw_proxy.getProxyNum()
        return result

    def getUsefulProxyNumber(self):
        result = self.useful_proxy.getProxyNum()
        return result

    def tickDomainRequestState(self, domain, code):
        self.domain_counter.tickDomainRequestState(domain, code)

    def getDomainCounter(self, domain):
        result = self.domain_counter.getDomainCounter(domain)
        return result
示例#4
0
 def __init__(self):
     self.useful_proxy = UsefulProxyDocsModel()
     self.raw_proxy = RawProxyDocsModel()
     self.datx = datx.City(IP_DATA_PATH)