示例#1
0
def forRequest():
    global proxyExistsAll
    proxyExistsAll = myProxy.proxyExistsAll()
示例#2
0
def requestByProxy(url):
    # forRequest()

    i = 0
    src = ""
    proxy_port = myProxy.proxyExistsAll()
    x = random.randint(0, len(proxy_port) - 1)
    proxy = proxy_port[x]
    # print(proxy[0])
    proxy = proxy[0] + ":" + proxy[1]
    # print(proxy)
    # print('=' * 5 + u'开始使用的代理:' + 'http://%s' % proxy)
    proxyHandler = urllib2.ProxyHandler({"http": r"http://%s" % proxy})

    cookies = urllib2.HTTPCookieProcessor
    opener = urllib2.build_opener(cookies, proxyHandler)
    # opener = urllib2.build_opener(proxyHandler)
    userAgentList = [
        [
            "chrome",
            "Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30",
        ],
        ["Firefox", "Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0"],
        [
            "IE8",
            "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)",
        ],
        [
            "IE9",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
        ],
        ["Opera", "Opera/9.80 (Windows NT 5.1; U; zh-cn) Presto/2.9.168 Version/11.50"],
        [
            "360 safe Browser in IE8",
            "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C)",
        ],
        [
            "Safari",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1",
        ],
        [
            "Maxthon",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12",
        ],
    ]
    opener.addheaders = [("User-agent", random.sample(userAgentList, 1)[0][1])]
    count = 1
    src = ""
    while i == 0:
        try:
            res_temp = opener.open(url)
            src = res_temp.read()
            res_temp.close()
            # d = pq(src)
            # d.find('title')
            # if d.find('title').text() == u'淘宝网 - 淘!我喜欢':
            #     # time.sleep(abs(random.gauss(1, 1)))
            #     raise Exception
            i += 1
        except:
            count += 1
            # 如若尝试超过5次均无法连接成功,则抛弃
            if count > 5:
                print("drop:" + url)
                break
            # proxy = random.sample(proxy_port, 1)[0]
            proxy = proxy_port[random.randint(0, len(proxy_port) - 1)]
            print(u"更换代理:%s:%s" % (proxy[0], proxy[1]))
            proxy = proxy[0] + ":" + proxy[1]
            proxyHandler = urllib2.ProxyHandler({"http": r"http://%s" % proxy})
            opener = ""
            opener = urllib2.build_opener(cookies, proxyHandler)
            # opener = urllib2.build_opener(proxyHandler)
            continue
    return src