Exemplo n.º 1
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def fetch(self):
        """
        fetch proxy into db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        proxy_set = set()
        self.log.info("ProxyFetch : start")

        for proxyGetter in config.proxy_getter_functions:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=proxyGetter))
            try:
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()

                    if not proxy or not verifyProxyFormat(proxy):
                        self.log.error('ProxyFetch - {func}: '
                                       '{proxy} illegal'.format(
                                           func=proxyGetter,
                                           proxy=proxy.ljust(20)))

                        continue
                    elif proxy in proxy_set:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} exist'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))

                        continue
                    else:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} success'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        self.db.put(Proxy(proxy, source=proxyGetter))
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=proxyGetter))
                self.log.error(str(e))

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            random_choice = random.choice(item_list)

            return Proxy.newProxyFromJson(random_choice)

        return None

    def get_http(self):
        """
        return a http proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'http':
                    return Proxy.newProxyFromJson(random_choice)

        return None

    def get_socks(self):
        """
        return a useful socks proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'socks4':
                    return Proxy.newProxyFromJson(random_choice)

        return None

    def delete(self, proxy_str):
        """
        delete proxy from pool
        :param proxy_str:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy_str)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        return [Proxy.newProxyFromJson(_) for _ in item_list]

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()

        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 2
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch
            # proxy_set = set()
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                # proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 3
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 4
0
class ProxyManager(object):

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        抓取代理地址存入DB中
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()

            try:
                self.log.info("{func}:fetch proxy start".format(func=proxyGetter))
                proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
            except Exception as e:
                self.log.error("{func}:fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy))

            # 存储到DB
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        返回一个有用的代理
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        items = self.db.getAll()
        if EnvUtil.PY3:
            return list(items.keys()) if items else list()
        return items.key() if items else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_proxy = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_proxy
        }
Exemplo n.º 5
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'
        self.adsl_queue = 'adsl'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def initProxyPool(self):
        """
        第一次启动时调用这个方法
        :return:
        """
        self.deleteAll()
        self.db.changeTable(self.adsl_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.values()) if item_dict else list()
        return item_dict.values() if item_dict else list()

    def deleteAll(self):
        """
        清空代理池
        :param proxy:
        :return:
        """
        # 删除所有
        proxies = self.getAll()
        for proxy in proxies:
            self.delete(proxy)

    def refreshADSL(self, proxy):
        """
        重新拨号
        :param proxy:
        :return:
        """
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf8')
        ip = proxy.split(':')[0]
        try:
            # 调用接口重新拨号
            refreshApi = "http://{ip}:8000/refresh".format(ip=ip)
            r = requests.get(refreshApi, timeout=5, verify=False)
            if r.status_code == 200:
                print('{proxy} refres done')
        except Exception as e:
            print(str(e))
Exemplo n.º 6
0
class ProxyManager(object):
    """
    ProxyManager
    """

    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            # store raw proxy
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
Exemplo n.º 7
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch
            proxy_set = set()
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                proxy_iter = [
                    _ for _ in getattr(GetFreeProxy, proxyGetter.strip())()
                ]
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue
            for proxy in proxy_iter:
                proxy = proxy.strip()
                if proxy and verifyProxyFormat(proxy):
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy)
                else:
                    self.log.error('{func}: fetch proxy {proxy} error'.format(
                        func=proxyGetter, proxy=proxy))

            # store
            for proxy in proxy_set:
                self.db.changeTable(self.useful_proxy_queue)
                if self.db.exists(proxy):
                    continue
                self.db.changeTable(self.raw_proxy_queue)
                self.db.put(proxy)

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def validateProxy(self, proxy):

        return validUsefulProxy(proxy)

    def referProxy(self, proxy, city):

        print(proxy, city)
        if proxy != None:
            if self.validateProxy(proxy):
                return proxy

            old_proxy_info = GetProxyLocInfo(proxy)
            old_proxy_loc = old_proxy_info.get_proxy_loc_info()
            city = old_proxy_loc[1]

        count = 1
        while count < 15:
            tmp_proxy = self.get()
            print(tmp_proxy)
            if self.validateProxy(tmp_proxy):
                try:
                    tmp_proxy_info = GetProxyLocInfo(tmp_proxy)
                    tmp_proxy_loc = tmp_proxy_info.get_proxy_loc_info()
                    if tmp_proxy_loc[1] == city:
                        return tmp_proxy
                except:
                    pass
            count += 1

        return None

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 8
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    @staticmethod
    def __dynamic_import__(name):
        components = name.split('.')
        mod = __import__(components[0])
        for comp in components[1:]:
            mod = getattr(mod, comp)
        return mod

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter user defined proxy getter class
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        try:
            proxy_getter_class = self.__dynamic_import__(
                config.proxy_getter_lib)
        except Exception as e:
            raise Exception('%s not found in ProxyGetter' %
                            config.proxy_getter_lib)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(proxy_getter_class,
                                     proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 9
0
class ProxyManager(object):
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = Loghandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    if proxy is not False:
                        if proxy and verifyProxyFormat(proxy):
                            self.log.info("{func}: fetch proxy {proxy}".format(
                                func=proxyGetter, proxy=proxy))
                            self.db.put(proxy)
                        else:
                            self.log.error(
                                "{func}: fetch proxy {proxy} error".format(
                                    func=proxyGetter, proxy=proxy))
            except Exception as s:
                self.log.error("refresh: {}".format(s))
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None

    def get_new(self):
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            return random.choice(item_dict)

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()
        # if EnvUtil.PY3:
        #     return list(item_dict.keys()) if item_dict else list()
        # return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_proxy = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_proxy
        }

    def getNumber_new(self):
        # self.db.changeTable(self.raw_proxy_queue)
        raw, useful = self.db.getNumber(self.raw_proxy_queue,
                                        self.useful_proxy_queue)
        # self.db.changeTable(self.useful_proxy_queue)
        # total_useful_proxy = self.db.getNumber(self.useful_proxy_queue)
        print('{}---,{}'.format(raw, useful))
        return raw, useful
Exemplo n.º 10
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self, mode):
        self.mode = mode
        self.db = DbClient(mode)
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info(
                            'Mode:{mode} {func}: fetch proxy {proxy}'.format(
                                mode=self.mode, func=proxyGetter, proxy=proxy))
                        self.db.put(proxy)
                    else:
                        self.log.error(
                            'Mode:{mode} {func}: fetch proxy {proxy} error'.
                            format(mode=self.mode,
                                   func=proxyGetter,
                                   proxy=proxy))
            except Exception as e:
                self.log.error("Mode:{mode} {func}: fetch proxy fail".format(
                    mode=self.mode, func=proxyGetter))
                continue

    def get_http(self):
        """
        return a useful proxy (http)
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def get_https(self):
        """
        return a useful proxy (https)
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 11
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        """
        fetch proxy into Db by ProxyGetter
        :return:
        """
        for proxyGetter in self.config.proxy_getter_functions:
            # fetch raw proxy
            for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                if proxy:
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy['ip']))
                    self.db.changeTable(self.useful_proxy_queue)
                    if self.db.exists(proxy['ip']):
                        continue
                    self.db.changeTable(self.raw_proxy_queue)
                    proxy['country'] = self.get_ip_country(proxy['ip'])
                    self.db.put(proxy)

    def get_ip_country(self, ip):
        match = geolite2.lookup(ip)
        return match.country if match else None

    def get(self, filters):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.random_one(filters)

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()

    def clean(self):
        self.db.clean()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemplo n.º 12
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def fetch(self):
        """
        fetch proxy into db by ProxyGetter
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        proxy_set = set()
        self.log.info("ProxyFetch : start")
        for proxyGetter in config.proxy_getter_functions:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=proxyGetter))
            try:
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    proxy = proxy.strip()

                    if not proxy or not verifyProxyFormat(proxy):
                        self.log.error('ProxyFetch - {func}: '
                                       '{proxy} illegal'.format(
                                           func=proxyGetter,
                                           proxy=proxy.ljust(20)))
                        continue
                    elif proxy in proxy_set:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} exist'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        continue
                    else:
                        self.log.info('ProxyFetch - {func}: '
                                      '{proxy} success'.format(
                                          func=proxyGetter,
                                          proxy=proxy.ljust(20)))
                        self.db.put(Proxy(proxy, source=proxyGetter))
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=proxyGetter))
                self.log.error(str(e))

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()
        if item_list:
            random_choice = random.choice(item_list)
            return Proxy.newProxyFromJson(random_choice)
        return None

    def delete(self, proxy_str):
        """
        delete proxy from pool
        :param proxy_str:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy_str)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()
        return [Proxy.newProxyFromJson(_) for _ in item_list]

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def getAllByName(self, name):
        all_proxies = self.getAll()

        self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
        fail_list = self.db.getAll()
        fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list]

        # todo: 优化
        filter_proxies = []
        for proxy in all_proxies:
            isFailed = False
            for failed in fail_proxies:
                if failed.proxy == proxy.proxy:
                    failed_date = datetime.strptime(failed.last_time,
                                                    "%Y-%m-%d %H:%M:%S")
                    if failed_date + timedelta(hours=24) > datetime.now():
                        isFailed = True
                    break
            if not isFailed:
                filter_proxies.append(proxy)

        return filter_proxies

    def deleteByName(self, name, proxy):
        failed_proxy = Proxy(
            proxy=proxy,
            last_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
        self.db.put(failed_proxy)

    def getByName(self, name):
        proxies = self.getAllByName(name)
        if proxies:
            return random.choice(proxies)
        return None
Exemplo n.º 13
0
class ProxyManager(object):
    """
    ProxyManager
    """
    def __init__(self):
        self.db = DbClient()
        self.raw_proxy_queue = 'raw_proxy_test'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy_test'

    def refresh(self):
        """从已有站点上抓取proxy,并存放到redis raw_proxy
        fetch proxy into Db by ProxyGetter/getFreeProxy.py
        :return:
        """
        max_conn = 100
        meta: dict = {}
        self.db.changeTable(self.raw_proxy_queue)
        for proxyGetter in config.proxy_getter_functions:
            # fetch
            try:
                self.log.info(
                    "{func}: fetch proxy start".format(func=proxyGetter))
                for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
                    # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
                    proxy = proxy.strip()
                    if proxy and verifyProxyFormat(proxy):
                        self.log.info('{func}: fetch proxy {proxy}'.format(
                            func=proxyGetter, proxy=proxy))
                        host, port = proxy.split(":")
                        meta["host"] = host
                        meta["port"] = port
                        meta["max_conn"] = max_conn
                        self.db.put(proxy, json.dumps(meta))
                    else:
                        self.log.error(
                            '{func}: fetch proxy {proxy} error'.format(
                                func=proxyGetter, proxy=proxy))
            except Exception as e:
                self.log.error(e)
                self.log.error(
                    "{func}: fetch proxy fail".format(func=proxyGetter))
                continue

    def get(self):
        """
        return a useful proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if item_dict:
            if EnvUtil.PY3:
                return random.choice(list(item_dict.keys()))
            else:
                return random.choice(item_dict.keys())
        return None
        # return self.db.pop()

    def delete(self, proxy):
        """
        delete proxy from pool
        :param proxy:
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        """
        get all proxy from pool as list
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_dict = self.db.getAll()
        if EnvUtil.PY3:
            return list(item_dict.keys()) if item_dict else list()
        return item_dict.keys() if item_dict else list()

    def getNumber(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.getNumber()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.getNumber()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }

    def add_proxy(proxy, meta):
        # 向proxy-center中增加proxy节点,同时更新redis
        host, port = proxy.split(":")
        url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/'
        jmeta = json.dumps(meta)
        r = requests.post(url, data=jmeta)
        # print(r.status_code)
        print(r.text)

    def delete_proxy(proxy):
        # 从proxy-center中删除proxy节点,同时更新redis
        host, port = proxy.split(":")
        url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/'
        r = requests.delete(url)