def chinahpo(hpo): # 如果使用IP池,则不进行随机等待 # s = random.randint(5, 10) # print("等待 " + str(s) + "秒") # time.sleep(s) ip = randomIP() # ip = "socks5://127.0.0.1:1080" print("使用IP " + ip) options = EdgeOptions() options.use_chromium = True options.add_argument("headless") # options.add_argument("disable-gpu") options.add_argument("--proxy-server={ip}".format(ip=ip)) options.add_argument("--disable-blink-features") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("start-maximized") options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe" driver = Edge(options=options, executable_path=msedge) script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" driver.execute_script(script) UA = randomUA() # UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36" driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": UA}) print(driver.execute_script("return navigator.userAgent;")) hpid = hpo.split(":")[1] url = "http://www.chinahpo.org/#/searchList?trigger=1&tabType=1&searchContent=HP%3A{hpid}".format( hpid=hpid) try: driver.get(url) strtemp = url print("网址:", strtemp) except Exception: print("get page error", hpo) time.sleep(2) with open("html2/hp_" + hpid + ".html", "a+", encoding="utf-8") as f: f.write(str(driver.page_source)) driver.close() fin = open("finish.txt", "a") fin.write(hpo + "\n") fin.close()
def test_chromium_options(self): try: options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) cap = driver.capabilities self.assertEqual('msedge', cap['browserName'], 'Driver launches Edge Chromium.') result = driver.execute_cdp_cmd('Browser.getVersion', {}) self.assertTrue('userAgent' in result, 'Driver can send Chromium-specific commands.') except: self.assertTrue(False, 'Test chromium options failed.') else: driver.quit()
def chinahpo(hpo_queue): while hpo_queue.empty() is not True: hpo = hpo_queue.get() # 如果使用IP池,则不进行随机等待 s = random.randint(5, 10) print(hpo, "等待 " + str(s) + "秒") time.sleep(s) ip = randomIP() # ip = "socks5://127.0.0.1:1080" hpo_ip = hpo + "\t" + ip print(hpo_ip) options = EdgeOptions() options.use_chromium = True options.add_argument("headless") # options.add_argument("disable-gpu") options.add_argument("--proxy-server=http://{ip}".format(ip=ip)) options.add_argument("--disable-blink-features") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("start-maximized") options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) geo = get_timezone_geolocation(ip) print(geo) geo_json = {"latitude": geo[1], "longitude": geo[2], "accuracy": 1} timezone = {"timezoneId": geo[0]} preferences = { "webrtc.ip_handling_policy": "disable_non_proxied_udp", "webrtc.multiple_routes_enabled": False, "webrtc.nonproxied_udp_enabled": False } options.add_experimental_option("prefs", preferences) msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe" driver = Edge(options=options, executable_path=msedge) script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" driver.execute_script(script) UA = UserAgent().random # UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36" driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": UA}) driver.execute_cdp_cmd("Emulation.setGeolocationOverride", geo_json) driver.execute_cdp_cmd("Emulation.setTimezoneOverride", timezone) print(driver.execute_script("return navigator.userAgent;")) hpid = hpo.split(":")[1] url = "http://www.chinahpo.org/#/searchList?trigger=1&tabType=1&searchContent=HP%3A{hpid}".format( hpid=hpid) try: driver.get(url) strtemp = url print("网址:", strtemp) except Exception: print("get page error", hpo) time.sleep(2) with open("html2/hp_" + hpid + ".html", "a+", encoding="utf-8") as f: f.write(str(driver.page_source)) driver.close() fin = open("finish.txt", "a") fin.write(hpo + "\n") fin.close() size = getDocSize("html2/hp_" + hpid + ".html") if 9000 <= size <= 15000: checkIP = open("ip_check_better.txt", "a") checkIP.write(hpo_ip + "\n") checkIP.close()