Пример #1
0
    def __init__(self):
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/random/')
        def random():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_DEFAULT_COUNT)
            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies/')
        def proxies():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_DEFAULT_COUNT)
            proxies = [{'ip': proxy.ip, 'port': proxy.port} for proxy in proxies]
            return json.dumps(proxies)

        @self.app.route('/disabled_domain/')
        def disable_domain():
            ip = request.args.get('ip', None)
            domain = request.args.get('domain', None)
            if ip is None:
                return '填写ip'
            if domain is None:
                return '填写域名'
            self.mongo_pool.disabled_domain(ip, domain)
            return '成功设置{}禁止访问{}'.format(ip, domain)
Пример #2
0
    def __init__(self):
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()
        @self.app.route('/random')
        def random():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol, domain, count=API_COUNT)
            print(proxy)
            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/all')
        def all():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol, domain, count=API_COUNT)
            proxies = [proxy.__dict__ for proxy in proxies]
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            ip = request.args.get('ip')
            domain = request.args.get('domain')
            if ip is None:
                return "请提供ip参数"
            if domain is None:
                return "请传入域名"
            self.mongo_pool.disable_domain(ip, domain)
            return "{} 禁用域名{} 成功".format(ip, domain)
Пример #3
0
    def __init__(self):
        # 2.1 初始一个Flask的Web服务
        self.app = Flask(__name__)
        # 创建MongoPool对象,用于操作数据库
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            """
            2.2 实现根据协议类型和域名,提供随机的获取高可用代理IP的服务
                可用通过 protocol 和 domain 参数对IP进行过滤
                protocol:当前请求的协议类型
                domain:当前请求域名
            :return:
            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)

            if protocol:
                return f'{protocol}://{proxy.ip}:{proxy.port}'
            else:
                return f'{proxy.ip}:{proxy.port}'

        @self.app.route('/proxies')
        def proxies():
            """
            2.3 实现根据协议类型和域名,提供获取多个高可用代理IP的服务
                可用通过protocol 和 domain 参数对IP进行guolv
                实现给指定的IP上追加不可用域名的服务
            :return:
            """
            # 获取协议: http/https
            protocol = request.args.get('protocol')
            # 域名:如jd.com
            domain = request.args.get('domain')

            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            # proxies 是一个Proxy对象的列表,但是Proxy对象不饿能进行json序列化,需要转换为字典的列表
            # 转换为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]

            return {"proxies": proxies}

        @self.app.route('/disable_domain')
        def disable_domain():
            # 2.4 如果在获取IP的时候,有指定域名参数,将不再获取该IP,从而进一步提高代理IP的可用性
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '请提供ip参数'
            if domain is None:
                return '请提供域名domain参数'
            self.mongo_pool.disable_domain(ip, domain)
            return f"{ip} 禁用域名 {domain} 成功"
Пример #4
0
    def __init__(self):
        # 创建MongoPool对象
        self.mongopool = MongoPool()
        # 3,使用异步来执行每一个爬虫任务

        # 3.1 在init方法中创建协程池队象
        self.coroutine_pool = Pool()
Пример #5
0
    def __init__(self):
        # 初始化一个Flask的Web服务
        self.app = Flask(__name__)
        # 创建MongoPool对象,用于操作数据库
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            '''
            根据协议类型和域名,提供随机的获取高可用代理IP的服务
            :protocol: 当前请求的协议类型
            :domain: 当前请求域名
            '''
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT,
                                                 nick_type=2)

            if protocol:
                return "{}://{}:{}".format(protocol, proxy.ip, proxy.port)
            else:
                return "{}:{}".format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            '''
            实现根据协议类型和域名,提供获取多个高可用代理的IP服务
            :return:
            '''
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT,
                                                  nick_type=0)
            # proxies 是一个Proxy对象列表,需要转化为字典列表
            # 转化为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]
            # 返回json格式的字符串
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            '''
            如果在获取IP的时候,有指定域名参数,将不再获取该IP,从而进一步提高代理IP的可用性
            :return:
            '''
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return "请提供ip参数"
            if domain is None:
                return "请提供域名domain参数"

            self.mongo_pool.disable_domain(ip, domain)
            return f"{ip}禁用域名{domain}成功"
Пример #6
0
 def __init__(self, module_name='', spider_list=[]):
     if module_name:
         self.module_name = module_name
     if spider_list:
         self.spider_list = spider_list
     self.mongo_pool = MongoPool()
     #创建协程池
     self.coroutine_pool = Pool()
Пример #7
0
    def __init__(self):
        #初始化一个Flask的Web服务
        self.app = Flask(__name__)
        #创建MongoPool对象,用于操作数据库
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            """
            2.2实现根据协议类型和域名,提供随机的获取高可用的代理IP服务
            可通过protocol 和 domain 参数对Ip进行过滤
            protocol :当前请求的协议
            domain :当前请求域名
            :return:
            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)

            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            """
            2.3实现根据协议和域名,提供获取多个高可用代理的服务
                可指定potocol 和domain 参数对IP进行过滤

            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            #proxies 是一个Proxy对象的列表,但是Proxy对象不能进行json序列化,需要转换成字典列表
            #转化为字典
            proxies = [proxy.__dict__ for proxy in proxies]
            #返回json字符串
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            #2.4实现给指定的IP上追加不可用域名的服务
            #如果在获取IP的时候,有指定域名参数,将不在获取该ip,从而进一步提高代理IP的可用性
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '情提供ip参数'
            if domain is None:
                return '情提供域名domain参数'

            self.mongo_pool.disable_domain(ip, domain)
            return '{}禁用域名{}成功'.format(ip, domain)
Пример #8
0
    def __init__(self):
        # 实现初始方法
        # 初始一个Flask的Web服务
        self.app = Flask(__name__)
        # 创建MongoPool对象
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            """
            实现根据协议类型和域名,提供随机的获取高可用代理IP的服务
            可用通过protocol和domain参数对IP进行过滤
            protocol:当前请求的协议类型
            domain:当前请求域名
            :return:
            """
            protocol = request.args.get('protocal')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)

            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            """实现根据协议类型和域名,提供获取多个高可用代理IP的服务"""
            protocol = request.args.get('protocal')
            domain = request.args.get('domain')

            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            # proxies是对象的列表,不能进行json序列化,需要转化为字典列表
            # 转换为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]

            # 返回json格式字符串
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            """如果在获取IP的时候,有指定域名参数,将不在获取该IP从而进一步提高代理IP的可用性"""
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '请求提供ip参数'
            if domain is None:
                return '请提供域名domain参数'

            self.mongo_pool.disable_domain(ip, domain)

            return "{} 禁用域名{} 成功".format(ip, domain)
Пример #9
0
    def __init__(self):
        # 实现初始方法
        # 2.1 初始一个Flask的web服务器
        self.app = Flask(__name__)
        # 创建MongoPool对象用于操作数据库
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            """
            2.2 实现根据协议类型和域名,提供随机获取高可用代理ip的服务
              - 可通过protocol和domain参数对ip进行过滤
              - protocol:当前请求的协议类型
              - domain:当前请求域名
            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            # print(protocol)
            # print(domain)
            proxy = self.mongo_pool.random_proxies(protocol, domain, count=PROXIES_MAX_COUNT)

            if protocol:
                return f"{protocol}://{proxy.ip}:{proxy.port}"
            else:
                return f"{proxy.ip}:{proxy.port}"

            # return '测试'

        @self.app.route('/proxies')
        def proxies():
            """
            2.3 实现根据协议类型和域名,提供获取多个高可用代理ip的服务
            可用通过protocol和domain参数对ip进行过滤
            :return:
            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT)
            # proxies是一个Proxy对象的列表,但是proxy对象不能进行json序列化,需要转换为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]
            # 返回json格式值串
            return json.dumps(proxies)

        # 2.4 实现给指定ip追加不可用域名的服务
        @self.app.route('/disable')
        def disable_domain():
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '请提供ip参数'
            if domain is None:
                return '请提供domain参数'

            self.mongo_pool.disable_domain(ip, domain)
            return f"{ip}禁用域名{domain}成功"
Пример #10
0
    def __init__(self):
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            protocol = request.args.get('protocol')
            proxy = self.mongo_pool.usable_proxy()
            if protocol:
                return f'{protocol}://{proxy.ip}:{proxy.port}'
            else:
                return f'{proxy.ip}:{proxy.port}'
Пример #11
0
    def __init__(self):
        #初始一个Flask的web服务
        self.app=Flask(__name__)
        #创建MongoPool对象,用于操作数据库
        self.mongo_pool=MongoPool()
        @self.app.route('/random')
        def random():
            """
            # 实现根据协议类型和域名,提供随机的获取高可用代理IP的服务
            # 可用通过protocol和domain 参数对ip进行过滤
            # protocol: 当前请求的协议类型
            # domain: 当前请求域名
            :return:
            """
            protocol=request.args.get('protocol')
            domain=request.args.get('domain')
            print(protocol)
            print(domain)
            proxy=self.mongo_pool.random_proxy(protocol,domain,count=PROXIES_MAX_COUNT)
            if protocol:
                return '{}://{}:{}'.format(protocol,proxy.ip,proxy.port)
            else:
                return  '{}:{}'.format(proxy.ip,proxy.port)
        @self.app.route('/proxies')
        def proxies():
            # 实现根据协议类型和域名,提供获取多个高可用代理IP的服务
            # 可用通过protocol和domain参数对ip进行过滤
            # 实现给指定的IP上追加不可用域名的服务
            #获取协议:http/https
            protocol=request.args.get('proxies')
            #区域名:jd.com
            domain=request.args.get('domain')
            proxies=self.mongo_pool.get_proxies(protocol,domain,count=PROXIES_MAX_COUNT)
            #proxies 是一个 Proxy对象的列表,但是Proxy对象不能进行josn序列化,需要转化字典列表
            proxies=[proxy.__dict__ for proxy in proxies]
            #返回json格式值串
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            # 如果在获取ip的时候,有指定域名参数,将不再获取IP,从而进一步提高代理IP的可用性
            ip=request.args.get('ip')
            domain=request.args.get('domain')
            if ip is None:
                return "请提供ip参数"
            if domain is None:
                return '请提供域名domain参数'
            self.mongo_pool.disable_domain(ip,domain)
            return "{} 禁用域名 {} 成功".format(ip,domain)
Пример #12
0
    def __init__(self, count):
        self.count = count
        self.app = Flask(__name__, template_folder="../assets/templates")
        self.mongo_pool = MongoPool()

        @self.app.route('/')
        @self.app.route('/index')
        def index():
            return render_template("index.html")

        @self.app.route('/random')
        def random():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=self.count)

            if protocol:
                return f"{protocol}://{proxy.ip}:{proxy.port}"
            else:
                return f"{proxy.ip}:{proxy.port}"

        @self.app.route('/proxies')
        def proxies():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=self.count)
            # 将Proxy对象列表转化为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]
            # 将字典变为json返回
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '请提供IP参数\n'
            if domain is None:
                return '请提供domain参数\n'

            self.mongo_pool.disable_domain(ip, domain)
            return f"{ip} 禁用域名 {domain} 成功"
Пример #13
0
    def __init__(self):
        self.app = Flask(__name__)
        # 创建数据库
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            '''
                实现根据协议类型和域名, 提供随机的获取高可用代理ip服务
                可以通过 protocol 和 domain 参数对ip进行过滤
                protocol:当前请求的协议类型
                domain: 当前请求域名
            :return:
            '''
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)
            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            # proxies 是一个对象的列表,不能json序列化 ,需要转化为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]
            # 返回json
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            ip = request.args.get('ip')
            domain = request.args.get('domain')
            if ip is None:
                return "请提供ip参数"
            if domain is None:
                return "请提供domain参数"
            self.mongo_pool.disable_domain(ip, domain)
            return '{} 禁用域名 {} 成功'.format(ip, domain)
Пример #14
0
    def __init__(self):
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random_proxy():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.find_all()
            p_index = random.randint(1, 100)
            pr = proxies[p_index]
            pr = pr.__dict__
            return pr

        @self.app.route('/proxies')
        def proxies_list():
            proxies = self.mongo_pool.find_all()
            dict_list = [proxy.__dict__ for proxy in proxies]
            return json.dumps(dict_list)
Пример #15
0
    def __init__(self):
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)

            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            #把proxies对象转换为字典
            proxies = [proxy.__dict__ for proxy in proxies]
            #返回json格式列表
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '请求提供Ip参数'
            if domain is None:
                return '请提供域名domain参数'
            self.mongo_pool.disable_domain(ip, domain)
            return "{} 禁用域名 {} 成功".format(ip, domain)
Пример #16
0
    def __init__(self):
        # 2. 实现初始化方法
        # 2.1 初始一个Flask的Web服务
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/')
        def index():
            html = '''
                   <h2 align="center">Welcome to my proxies!</h2>
                   <div align="center"><a href="http://*****:*****@self.app.route('/random')
        def random():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)
            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')

            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            proxies = [proxy.__dict__ for proxy in proxies]
            return json.dumps(proxies, ensure_ascii=False)
Пример #17
0
    def __init__(self):
        # 初始化flask服务
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            protocol = request.args.get("protocol")
            domain = request.args.get("domain")
            proxy = self.mongo_pool.get_random_proxy(protocl=protocol,
                                                     domain=domain)
            if proxy:
                return "{}://{}:{}".format(protocol, proxy.ip, proxy.port)
            return "test"

        @self.app.route('/proxies')
        def proxies():
            # 获取的协议
            protocol = request.args.get("protocol")
            # 域名
            domain = request.args.get("domain")
            proxies = self.mongo_pool.get_proxies(protocl=protocol,
                                                  domain=domain,
                                                  count=PROXIES_MAX_COUNT)
            proxies_list = [proxy.__dict__ for proxy in proxies]
            return json.dumps(proxies_list)

        @self.app.route('/disable_domain')
        def disable_domain():
            ip = request.args.get('ip')
            domain = request.args.get("domain")
            if ip is None:
                return "ip不能为空"
            if domain is None:
                return "domain不能为空"
            self.mongo_pool.disable_domain(ip, domain)
            return "{} 禁用{} 成功".format(ip, domain)
Пример #18
0
    def __init__(self):
        # 2. 实现初始化方法
        # 2.1 初始一个Flask的Web服务
        self.app = Flask(__name__)
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol, domain, count=PROXIES_MAX_COUNT)
            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')

            proxies = self.mongo_pool.get_proxies(protocol, domain, count=PROXIES_MAX_COUNT)
            proxies = [proxy.__dict__ for proxy in proxies]
            return json.dumps(proxies)
Пример #19
0
 def __init__(self):
     self.mongo_pool = MongoPool()
     # 创建Queue和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Пример #20
0
 def __init__(self):
     #创建操作数据库的MonggoPool对象
     self.mongo_pool = MongoPool()
     # 在init方法中创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Пример #21
0
    def __init__(self):
        # 创建MongoPool代理池对象
        self.mongo_pool = MongoPool()

        # 3.1 创建协程池对象
        self.coroutine_pool = Pool()
Пример #22
0
 def __init__(self):
     #创建MongoDB对象
     self.mongo_pool = MongoPool()
     #在init中创建协程池
     self.coroutine_pool = Pool()
Пример #23
0
 def __init__(self):
     # 创建操作数据库的MongoPool对象
     self.mongo_pool = MongoPool()
     # 3.1 在`init`方法, 创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Пример #24
0
    def __init__(self):
        # 2. 实现初始方法
        # 2.1 初始一个Flask的Web服务
        self.app = Flask(__name__)
        # 创建MongoPool对象, 用于操作数据库
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            """
            2.2 实现根据协议类型和域名, 提供随机的获取高可用代理IP的服务
                可用通过 protocol 和 domain 参数对IP进行过滤
                protocol: 当前请求的协议类型
                domain: 当前请求域名
            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            proxy = self.mongo_pool.random_proxy(protocol,
                                                 domain,
                                                 count=PROXIES_MAX_COUNT)

            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            """
                2.3 实现根据协议类型和域名, 提供获取多个高可用代理IP的服务
                可用通过protocol 和 domain 参数对IP进行过滤
                实现给指定的IP上追加不可用域名的服务
            """
            # 获取协议: http/https
            protocol = request.args.get('protocol')
            # 域名: 如:jd.com
            domain = request.args.get('domain')

            proxies = self.mongo_pool.get_proxies(protocol,
                                                  domain,
                                                  count=PROXIES_MAX_COUNT)
            # proxies 是一个 Proxy对象的列表, 但是Proxy对象不能进行json序列化, 需要转换为字典列表
            # 转换为字典列表
            proxies = [proxy.__dict__ for proxy in proxies]
            # 返回json格式值串
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            """
            2.4 如果在获取IP的时候, 有指定域名参数, 将不在获取该IP, 从而进一步提高代理IP的可用性.
            """
            ip = request.args.get('ip')
            domain = request.args.get('domain')

            if ip is None:
                return '请提供ip参数'
            if domain is None:
                return '请提供域名domain参数'

            self.mongo_pool.disable_domain(ip, domain)
            return "{} 禁用域名 {} 成功".format(ip, domain)
Пример #25
0
 def __init__(self):
     #在init中,建立数据连接,获取要操作的集合
     self.mongo_pool=MongoPool()
     # 在init方法中创建协程池对象
     self.coroutine_pool=Pool()
Пример #26
0
 def __init__(self):
     # 创建mongopool对象
     self.mongo_pool = MongoPool()
     # 3.1 在init方法,创建队列和协程池
     self.queue = Queue()
     self.coroutine_pool = Pool()
Пример #27
0
 def __init__(self):
     self.mongo_pool = MongoPool()
     self.coroutine_pool = Pool()
Пример #28
0
    def __init__(self):
        # 2.1 初始化一个Flask的Web服务
        self.app = Flask(__name__)
        # 创建mongodb数据库连接对象
        self.mongo_pool = MongoPool()

        @self.app.route('/random')
        def random():
            """
            2.2 实现根据协议类型和域名, 提供随机的获取高可用代理IP的服务
                可用通过 protocol 和 domain 参数对IP进行过滤
                protocol: 当前请求的协议类型
                domain: 当前请求域名
            """
            protocol = request.args.get('protocol')
            domain = request.args.get('domain')
            nick_type = request.args.get('nick_type')
            try:
                proxy = self.mongo_pool.random_proxy(protocol=protocol,
                                                     domain=domain,
                                                     nick_type=int(nick_type),
                                                     count=PROXIES_MAX_COUNT)
            except Exception as e:
                logger.exception(e)
                return '数据库无法获取对应的Ip,请更改参数后重试'
            if protocol:
                return '{}://{}:{}'.format(protocol, proxy.ip, proxy.port)
            else:
                return '{}:{}'.format(proxy.ip, proxy.port)

        @self.app.route('/proxies')
        def proxies():
            """
               2.3 实现根据协议类型和域名, 提供获取多个高可用代理IP的服务
               可用通过protocol 和 domain 参数对IP进行过滤
               实现给指定的IP上追加不可用域名的服务
            """
            # 获取协议: http/ https
            protocol = request.args.get('protocol')
            # 域名: 如 jd.com
            domain = request.args.get('domain')
            nick_type = request.args.get('nick_type')
            try:
                proxies = self.mongo_pool.get_proxies(protocol=protocol,
                                                      domain=domain,
                                                      nick_type=int(nick_type),
                                                      count=PROXIES_MAX_COUNT)
            except Exception as e:
                logger.exception(e)
                return '数据库无法获取对应的Ip,请更改参数后重试'
            proxies = [proxy.__dict__ for proxy in proxies]
            return json.dumps(proxies)

        @self.app.route('/disable_domain')
        def disable_domain():
            """
            2.4 如果在获取IP的时候, 有指定域名参数, 将不在获取该IP, 从而进一步提高代理IP的可用性.
            :return:
            """
            ip = request.args.get('ip')
            domain = request.args.get('domain')
            if 1 or ip and domain is None:
                return '参数IP 与 domain缺一不可'
            try:
                self.mongo_pool.disable_domain(ip, domain)
            except Exception as e:
                logger.exception(e)
                return 'IP不存在或格式有误,请核实后再试'

            return 'IP:{} 禁用域名 {} 成功'.format(ip, domain)
Пример #29
0
            area = self.get_first_ele(tr.xpath(self.detail_xpath['area']))
            proxy = Proxy(ip, port, area=area)
            yield proxy

    def get_first_ele(self, lis):
        return lis[0] if len(lis) != 0 else ''

    def get_proxies(self):
        for url in self.urls:
            page = self.get_page_from_url(url)
            proxies = self.get_proxies_from_page(page)
            yield from proxies


if __name__ == '__main__':
    config = {
        'urls':
        ['https://www.xicidaili.com/nt/{}'.format(i) for i in range(1, 4)],
        'group_xpath': '//*[@id="ip_list"]/tr',  # 查看网页源代码里面是否真的有tbody,
        'detail_xpath': {
            'ip': './td[2]/text()',
            'port': './td[3]/text()',
            'area': './td[4]/a/text()',
        }
    }
    c_spider = BaseSpider(**config)
    for line, _ in enumerate(c_spider.get_proxies()):
        mongo = MongoPool()
        mongo.insert_one(_)
        print(line, _)
Пример #30
0
 def __init__(self):
     # 创建操作数据库的mongoPool对象
     self.mongo_pool = MongoPool()
     self.queue = Queue()
     self.coroutine_pool = Pool()