Exemplo n.º 1
0
def chinahpo(hpo):
    # 如果使用IP池,则不进行随机等待
    # s = random.randint(5, 10)
    # print("等待 " + str(s) + "秒")
    # time.sleep(s)
    ip = randomIP()
    # ip = "socks5://127.0.0.1:1080"
    print("使用IP " + ip)
    options = EdgeOptions()
    options.use_chromium = True
    options.add_argument("headless")
    # options.add_argument("disable-gpu")
    options.add_argument("--proxy-server={ip}".format(ip=ip))
    options.add_argument("--disable-blink-features")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("start-maximized")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)
    msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe"

    driver = Edge(options=options, executable_path=msedge)
    script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
    driver.execute_script(script)
    UA = randomUA()
    # UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36"
    driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": UA})
    print(driver.execute_script("return navigator.userAgent;"))

    hpid = hpo.split(":")[1]
    url = "http://www.chinahpo.org/#/searchList?trigger=1&tabType=1&searchContent=HP%3A{hpid}".format(
        hpid=hpid)

    try:
        driver.get(url)
        strtemp = url
        print("网址:", strtemp)
    except Exception:
        print("get page error", hpo)

    time.sleep(2)
    with open("html2/hp_" + hpid + ".html", "a+", encoding="utf-8") as f:
        f.write(str(driver.page_source))

    driver.close()
    fin = open("finish.txt", "a")
    fin.write(hpo + "\n")
    fin.close()
    def test_chromium_options(self):
        try:
            options = EdgeOptions()
            options.use_chromium = True
            driver = Edge(options=options)
            cap = driver.capabilities
            self.assertEqual('msedge', cap['browserName'],
                             'Driver launches Edge Chromium.')

            result = driver.execute_cdp_cmd('Browser.getVersion', {})
            self.assertTrue('userAgent' in result,
                            'Driver can send Chromium-specific commands.')
        except:
            self.assertTrue(False, 'Test chromium options failed.')
        else:
            driver.quit()
Exemplo n.º 3
0
def chinahpo(hpo_queue):

    while hpo_queue.empty() is not True:
        hpo = hpo_queue.get()

        # 如果使用IP池,则不进行随机等待
        s = random.randint(5, 10)
        print(hpo, "等待 " + str(s) + "秒")
        time.sleep(s)
        ip = randomIP()
        # ip = "socks5://127.0.0.1:1080"
        hpo_ip = hpo + "\t" + ip
        print(hpo_ip)
        options = EdgeOptions()
        options.use_chromium = True
        options.add_argument("headless")
        # options.add_argument("disable-gpu")
        options.add_argument("--proxy-server=http://{ip}".format(ip=ip))
        options.add_argument("--disable-blink-features")
        options.add_argument("--disable-blink-features=AutomationControlled")
        options.add_argument("start-maximized")
        options.add_experimental_option("excludeSwitches",
                                        ["enable-automation"])
        options.add_experimental_option("useAutomationExtension", False)

        geo = get_timezone_geolocation(ip)
        print(geo)
        geo_json = {"latitude": geo[1], "longitude": geo[2], "accuracy": 1}
        timezone = {"timezoneId": geo[0]}

        preferences = {
            "webrtc.ip_handling_policy": "disable_non_proxied_udp",
            "webrtc.multiple_routes_enabled": False,
            "webrtc.nonproxied_udp_enabled": False
        }
        options.add_experimental_option("prefs", preferences)

        msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe"

        driver = Edge(options=options, executable_path=msedge)
        script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
        driver.execute_script(script)
        UA = UserAgent().random
        # UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36"
        driver.execute_cdp_cmd("Network.setUserAgentOverride",
                               {"userAgent": UA})
        driver.execute_cdp_cmd("Emulation.setGeolocationOverride", geo_json)
        driver.execute_cdp_cmd("Emulation.setTimezoneOverride", timezone)

        print(driver.execute_script("return navigator.userAgent;"))

        hpid = hpo.split(":")[1]
        url = "http://www.chinahpo.org/#/searchList?trigger=1&tabType=1&searchContent=HP%3A{hpid}".format(
            hpid=hpid)

        try:
            driver.get(url)
            strtemp = url
            print("网址:", strtemp)
        except Exception:
            print("get page error", hpo)

        time.sleep(2)
        with open("html2/hp_" + hpid + ".html", "a+", encoding="utf-8") as f:
            f.write(str(driver.page_source))

        driver.close()
        fin = open("finish.txt", "a")
        fin.write(hpo + "\n")
        fin.close()

        size = getDocSize("html2/hp_" + hpid + ".html")
        if 9000 <= size <= 15000:
            checkIP = open("ip_check_better.txt", "a")
            checkIP.write(hpo_ip + "\n")
            checkIP.close()