示例#1
0
 def __init__(self, user_agent=''):
     self.ip_ls = [
         {
             'ip_port': 'http://111.8.60.9:8123'
         },
         {
             'ip_port': 'http://101.71.27.120:80'
         },
         {
             'ip_port': 'http://122.96.59.104:80',
             'user_passwd': 'user3:pass3'
         },
         {
             'ip_port': 'http://122.224.249.122:8088',
             'user_passwd': 'user4:pass4'
         },
     ]
     self.pool = ProxyIpPool(crawl=False)
示例#2
0
class ProxyIpMiddleware(object):
    def __init__(self, user_agent=''):
        self.ip_ls = [
            {
                'ip_port': 'http://111.8.60.9:8123'
            },
            {
                'ip_port': 'http://101.71.27.120:80'
            },
            {
                'ip_port': 'http://122.96.59.104:80',
                'user_passwd': 'user3:pass3'
            },
            {
                'ip_port': 'http://122.224.249.122:8088',
                'user_passwd': 'user4:pass4'
            },
        ]
        # 此处使用到代理ip池
        self.pool = ProxyIpPool(crawl=False)

    def process_request(self, request, spider):
        """
        scrapy设置代理ip
        :param request:
        :param spider:
        :return:
        """
        print('===ProxyIpMiddleware process_request==')
        # 显示当前使用的useragent
        print("********Current UserAgent:%s************")
        proxy = random.choice(self.ip_ls)
        proxy = {'ip_port': 'http://' + self.pool.get_proxy()[1]}
        print(proxy)
        if proxy['user_passwd'] is None:
            # 没有代理账户验证的代理使用方式
            request.meta['proxy'] = proxy['ip_port']
        else:
            # 对账户密码进行 base64 编码转换
            base64_userpasswd = base64.b64encode(proxy['user_passwd'])
            # 对应到代理服务器的信令格式里
            request.headers[
                'Proxy-Authorization'] = 'Basic ' + base64_userpasswd
            request.meta['proxy'] = proxy['ip_port']