def init_browser(): global browser if "chrome_type" in config and config['chrome_type'] == "msedge": chrome_options = EdgeOptions() chrome_options.use_chromium = True else: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--ignore-ssl-errors') chrome_options.add_argument('--use-fake-ui-for-media-stream') chrome_options.add_experimental_option( 'prefs', { 'credentials_enable_service': False, 'profile.default_content_setting_values.media_stream_mic': 1, 'profile.default_content_setting_values.media_stream_camera': 1, 'profile.default_content_setting_values.geolocation': 1, 'profile.default_content_setting_values.notifications': 1, 'profile': { 'password_manager_enabled': False } }) chrome_options.add_argument('--no-sandbox') chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) if 'headless' in config and config['headless']: chrome_options.add_argument('--headless') print("Enabled headless mode") if 'mute_audio' in config and config['mute_audio']: chrome_options.add_argument("--mute-audio") if 'chrome_type' in config: if config['chrome_type'] == "chromium": browser = webdriver.Chrome( ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options) elif config['chrome_type'] == "msedge": browser = Edge(EdgeChromiumDriverManager().install(), options=chrome_options) else: browser = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) else: browser = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) # make the window a minimum width to show the meetings menu window_size = browser.get_window_size() if window_size['width'] < 1200: print("Resized window width") browser.set_window_size(1200, window_size['height']) if window_size['height'] < 850: print("Resized window height") browser.set_window_size(window_size['width'], 850)
def driver_init(): options = EdgeOptions() options.use_chromium = True profile_dir = r"--user-data-dir=C:\Users\Chan\AppData\Local\Microsoft\Edge\User Data" options.add_argument(profile_dir) options.add_experimental_option('excludeSwitches', ['enable-logging']) driver = Edge(options=options) return driver
def __init__(self): options = EdgeOptions() options.use_chromium = True # options.add_argument("headless") # options.add_argument("disable-gpu") #防止打印无用信息 enable-automation规避检测 options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) self.bro = Edge(options = options)
def create(downloadPath): driverPath = os.path.dirname( os.path.realpath(__file__)) + "\\msedgedriver.exe" edgeOptions = EdgeOptions() prefs = {"download.default_directory": os.getcwd() + "\\" + downloadPath} edgeOptions.add_experimental_option("prefs", prefs) return Edge(executable_path=driverPath, options=edgeOptions)
def createDriver(self, browser, driverPath, headless=None): """ Start selenium web driver Args: browser (str): Browser type driverPath (Path): Path to driver headless (bool): Headless bool Returns: driver: selenium driver """ self.headless = headless if browser == "Edg": edge_options = EdgeOptions() if self.headless: # make Edge headless edge_options.use_chromium = True edge_options.add_argument("headless") edge_options.add_argument("disable-gpu") edge_options.add_argument("--log-level=3") edge_options.add_experimental_option( 'excludeSwitches', ['enable-logging'] ) # edge_options.page_load_strategy("eager") self.driver = Edge( executable_path=str(driverPath), options=edge_options ) elif browser == "Chrome": chrome_options = Options() if self.headless: chrome_options.add_argument("--headless") chrome_options.add_argument("--log-level=3") chrome_options.add_experimental_option( 'excludeSwitches', ['enable-logging'] ) # chrome_options.page_load_strategy("eager") # don't know the chrome command self.driver = webdriver.Chrome( executable_path=str(driverPath), options=chrome_options ) else: print("Browser not supported yet") self.driver.set_window_size(1800, 1080) self.driver.set_page_load_timeout(100000) return self.driver
def edge(self, sign_test: bool): edge_webriver = self.msedge_path() save_folder = self.save_folder() extensions = self.extensions_path() options = EdgeOptions() if sign_test == False: options.headless = True else: options.add_extension(extension=extensions) options.use_chromium = True options.add_experimental_option( "prefs", {"download.default_directory": save_folder}) return Edge(executable_path=edge_webriver, options=options)
def chinahpo(hpo): # 如果使用IP池,则不进行随机等待 # s = random.randint(5, 10) # print("等待 " + str(s) + "秒") # time.sleep(s) ip = randomIP() # ip = "socks5://127.0.0.1:1080" print("使用IP " + ip) options = EdgeOptions() options.use_chromium = True options.add_argument("headless") # options.add_argument("disable-gpu") options.add_argument("--proxy-server={ip}".format(ip=ip)) options.add_argument("--disable-blink-features") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("start-maximized") options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe" driver = Edge(options=options, executable_path=msedge) script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" driver.execute_script(script) UA = randomUA() # UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36" driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": UA}) print(driver.execute_script("return navigator.userAgent;")) hpid = hpo.split(":")[1] url = "http://www.chinahpo.org/#/searchList?trigger=1&tabType=1&searchContent=HP%3A{hpid}".format( hpid=hpid) try: driver.get(url) strtemp = url print("网址:", strtemp) except Exception: print("get page error", hpo) time.sleep(2) with open("html2/hp_" + hpid + ".html", "a+", encoding="utf-8") as f: f.write(str(driver.page_source)) driver.close() fin = open("finish.txt", "a") fin.write(hpo + "\n") fin.close()
def StartEdgeDriver(): #Opens a headless Microsoft Edge Browser #Set all options to run headless but still render required elements EdgeOpts = EdgeOptions() EdgeOpts.use_chromium = True prefs = {"download.default_directory": DownloadFolder} ##SET DOWNLOAD PATH EdgeOpts.add_experimental_option("prefs", prefs) EdgeOpts.add_argument("--window-size=1920,1080") EdgeOpts.add_argument("--disable-extensions") EdgeOpts.add_argument("--proxy-server='direct://'") EdgeOpts.add_argument("--proxy-bypass-list=*") EdgeOpts.add_argument("--start-maximized") EdgeOpts.add_argument("--headless") EdgeOpts.add_argument("--disable-gpu") EdgeOpts.add_argument("--disable-dev-shm-usage") EdgeOpts.add_argument("--no-sandbox") EdgeOpts.add_argument("--ignore-certificate-errors") driver = Edge(options=EdgeOpts) return driver
def edge(): directory = os.getcwd() + '/files' options = EdgeOptions() options.add_experimental_option( "prefs", { "download.default_directory": directory, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True }) options.use_chromium = True path = os.getcwd() + '/drivers/msedgedriver.exe' os.environ['webdriver.msedge.driver'] = path driver = Edge(executable_path=path, options=options) return driver
def wa_login(isHeadless=True): ''' Use to login to Whatsapp Web Can omit usage if already logged in once by scanning QR Parameters ---------- None Returns ------- None ''' options = EdgeOptions() options.use_chromium = True #Uses chromium-based edgium, remove to use legacy edge options.add_argument("user-data-dir="+os.getcwd()+"\\Cache") options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.49") options.add_experimental_option('excludeSwitches', ['enable-logging']) # options.add_experimental_option("excludeSwitches", ["enable-automation"]) # options.add_experimental_option("useAutomationExtension", False) options.headless = isHeadless #Headless mode global driver driver = Edge(EdgeChromiumDriverManager().install(),options=options) driver.get('https://web.whatsapp.com/') if os.path.isfile('./Cache/wa.exists'): return else: pass wait_for_load('_1PTz1') driver.execute_script(""" var element1 = document.querySelector("._3DgtU"); var element2 = document.querySelector("._1iKcN"); if (element1) element1.parentNode.removeChild(element1); if (element2) element2.parentNode.removeChild(element2); """) Image.open(BytesIO(driver.find_element_by_class_name('landing-main').screenshot_as_png)).show() with open('Cache/wa.exists','w') as file: pass
def Webpage_login(self,scanner_url, username, password): """edge浏览器""" edgedriver = "./msedgedriver.exe" #这里写本地的edgedriver的所在路径 edge_options = EdgeOptions() edge_options.use_chromium = True # 使用谷歌内核 edge_options.add_argument("disable-gpu") # 禁用gpu加速,避免bug edge_options.add_argument('start-maximized') # 启动最大化 edge_options.add_argument('--ignore-certificate-errors') # 绕过“你的连接不是专用连接” edge_options.add_experimental_option('useAutomationExtension', False) # 关闭“Microsoft Edge正由自动测试软件控制。” edge_options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = Edge(executable_path=edgedriver,options=edge_options) driver.get(scanner_url) time.sleep(1) driver.find_element_by_id('username').click() driver.find_element_by_id('username').send_keys(username) driver.find_element_by_id('password').click() driver.find_element_by_id('password').send_keys(password) driver.find_element_by_class_name('submit').click() time.sleep(1) driver.find_element_by_id('two01').click()
def webDriverConfig(agent): driver_path = 'msedgedriver.exe' #options to make selenium faster prefs = { 'profile.default_content_setting_values': { 'images': 2, 'plugins': 2, 'popups': 2, 'geolocation': 2, 'notifications': 2, 'auto_select_certificate': 2, 'fullscreen': 2, 'mouselock': 2, 'mixed_script': 2, 'media_stream': 2, 'media_stream_mic': 2, 'media_stream_camera': 2, 'protocol_handlers': 2, 'ppapi_broker': 2, 'automatic_downloads': 2, 'midi_sysex': 2, 'push_messaging': 2, 'ssl_cert_decisions': 2, 'metro_switch_to_desktop': 2, 'protected_media_identifier': 2, 'app_banner': 2, 'site_engagement': 2, 'durable_storage': 2 } } options = EdgeOptions() options.use_chromium = True options.add_argument("--start-maximized") options.add_argument(f"user-agent={agent}") options.headless = True # comment this line to visualize page visiting options.add_experimental_option("prefs", prefs) driver = Edge(driver_path, options=options) return driver
def setup_driver(self, main_page): """Setup Selenium WebDriver which drives a browser natively, as a user would, either locally or on a remote machine using the Selenium server. Args: main_page(str): main page address. """ if sys.platform in ["win32", "win64"]: driver_suffix = "_win.exe" elif sys.platform.startswith("linux"): driver_suffix = "_linux" if self.driver_type == "Edge": options = EdgeOptions() options.use_chromium = True options.add_experimental_option("excludeSwitches", ["enable-logging"]) self.driver = Edge( executable_path=f"{EXTERN_PATH}/msedgedriver{driver_suffix}", options=options) elif self.driver_type == "Chrome": options = ChromeOptions() options.use_chromium = True options.add_experimental_option('excludeSwitches', ['enable-logging']) self.driver = Chrome( executable_path=f"{EXTERN_PATH}/chromedriver{driver_suffix}", options=options) elif self.driver_type == "Firefox": options = FirefoxOptions() self.driver = Firefox( executable_path=f"{EXTERN_PATH}/geckodriver{driver_suffix}", options=options) else: raise ValueError( f"Not known type of the provided driver: {self.driver_type}") self.driver.get(main_page)
def get_local_driver( browser_name, headless, locale_code, servername, proxy_string, proxy_auth, proxy_user, proxy_pass, user_agent, disable_csp, enable_ws, enable_sync, use_auto_ext, no_sandbox, disable_gpu, incognito, guest_mode, devtools, swiftshader, block_images, user_data_dir, extension_zip, extension_dir, mobile_emulator, device_width, device_height, device_pixel_ratio): ''' Spins up a new web browser and returns the driver. Can also be used to spin up additional browsers for the same test. ''' downloads_path = download_helper.get_downloads_folder() download_helper.reset_downloads_folder() if browser_name == constants.Browser.FIREFOX: try: try: # Use Geckodriver for Firefox if it's on the PATH profile = _create_firefox_profile( downloads_path, locale_code, proxy_string, user_agent, disable_csp) firefox_capabilities = DesiredCapabilities.FIREFOX.copy() firefox_capabilities['marionette'] = True options = webdriver.FirefoxOptions() if headless: options.add_argument('-headless') firefox_capabilities['moz:firefoxOptions'] = ( {'args': ['-headless']}) if LOCAL_GECKODRIVER and os.path.exists(LOCAL_GECKODRIVER): try: make_driver_executable_if_not(LOCAL_GECKODRIVER) except Exception as e: logging.debug("\nWarning: Could not make geckodriver" " executable: %s" % e) elif not is_geckodriver_on_path(): args = " ".join(sys.argv) if not ("-n" in sys.argv or "-n=" in args or args == "-c"): # (Not multithreaded) from seleniumbase.console_scripts import sb_install sys_args = sys.argv # Save a copy of current sys args print("\nWarning: geckodriver not found!" " Installing now:") try: sb_install.main(override="geckodriver") except Exception as e: print("\nWarning: Could not install geckodriver: " "%s" % e) sys.argv = sys_args # Put back the original sys args if "linux" in PLATFORM or not headless: firefox_driver = webdriver.Firefox( firefox_profile=profile, capabilities=firefox_capabilities) else: firefox_driver = webdriver.Firefox( firefox_profile=profile, capabilities=firefox_capabilities, options=options) except Exception: profile = _create_firefox_profile( downloads_path, locale_code, proxy_string, user_agent, disable_csp) firefox_capabilities = DesiredCapabilities.FIREFOX.copy() firefox_driver = webdriver.Firefox( firefox_profile=profile, capabilities=firefox_capabilities) return firefox_driver except Exception as e: if headless: raise Exception(e) return webdriver.Firefox() elif browser_name == constants.Browser.INTERNET_EXPLORER: if not IS_WINDOWS: raise Exception( "IE Browser is for Windows-based operating systems only!") from selenium.webdriver.ie.options import Options ie_options = Options() ie_options.ignore_protected_mode_settings = False ie_options.ignore_zoom_level = True ie_options.require_window_focus = False ie_options.native_events = True ie_options.full_page_screenshot = True ie_options.persistent_hover = True ie_capabilities = ie_options.to_capabilities() if LOCAL_IEDRIVER and os.path.exists(LOCAL_IEDRIVER): try: make_driver_executable_if_not(LOCAL_IEDRIVER) except Exception as e: logging.debug("\nWarning: Could not make iedriver" " executable: %s" % e) return webdriver.Ie(capabilities=ie_capabilities) elif browser_name == constants.Browser.EDGE: try: chrome_options = _set_chrome_options( browser_name, downloads_path, headless, locale_code, proxy_string, proxy_auth, proxy_user, proxy_pass, user_agent, disable_csp, enable_ws, enable_sync, use_auto_ext, no_sandbox, disable_gpu, incognito, guest_mode, devtools, swiftshader, block_images, user_data_dir, extension_zip, extension_dir, servername, mobile_emulator, device_width, device_height, device_pixel_ratio) if LOCAL_EDGEDRIVER and os.path.exists(LOCAL_EDGEDRIVER): try: make_driver_executable_if_not(LOCAL_EDGEDRIVER) except Exception as e: logging.debug("\nWarning: Could not make edgedriver" " executable: %s" % e) elif not is_edgedriver_on_path(): args = " ".join(sys.argv) if not ("-n" in sys.argv or "-n=" in args or args == "-c"): # (Not multithreaded) from seleniumbase.console_scripts import sb_install sys_args = sys.argv # Save a copy of current sys args print("\nWarning: msedgedriver not found. Installing now:") sb_install.main(override="edgedriver") sys.argv = sys_args # Put back the original sys args # For Microsoft Edge (Chromium) version 79 or lower return webdriver.Chrome(executable_path=LOCAL_EDGEDRIVER, options=chrome_options) except Exception: # For Microsoft Edge (Chromium) version 80 or higher from msedge.selenium_tools import Edge, EdgeOptions if LOCAL_EDGEDRIVER and os.path.exists(LOCAL_EDGEDRIVER): try: make_driver_executable_if_not(LOCAL_EDGEDRIVER) except Exception as e: logging.debug("\nWarning: Could not make edgedriver" " executable: %s" % e) edge_options = EdgeOptions() edge_options.use_chromium = True prefs = { "download.default_directory": downloads_path, "local_discovery.notifications_enabled": False, "credentials_enable_service": False, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": False, "safebrowsing.disable_download_protection": True, "profile": { "password_manager_enabled": False, "default_content_setting_values.automatic_downloads": 1, "managed_default_content_settings.automatic_downloads": 1, "default_content_settings.popups": 0, "managed_default_content_settings.popups": 0 } } if locale_code: prefs["intl.accept_languages"] = locale_code if block_images: prefs["profile.managed_default_content_settings.images"] = 2 edge_options.add_experimental_option("prefs", prefs) edge_options.add_experimental_option("w3c", True) edge_options.add_experimental_option( "useAutomationExtension", False) edge_options.add_experimental_option( "excludeSwitches", ["enable-automation", "enable-logging"]) if guest_mode: edge_options.add_argument("--guest") if headless: edge_options.add_argument("--headless") if mobile_emulator: emulator_settings = {} device_metrics = {} if type(device_width) is int and ( type(device_height) is int and ( type(device_pixel_ratio) is int)): device_metrics["width"] = device_width device_metrics["height"] = device_height device_metrics["pixelRatio"] = device_pixel_ratio else: device_metrics["width"] = 411 device_metrics["height"] = 731 device_metrics["pixelRatio"] = 3 emulator_settings["deviceMetrics"] = device_metrics if user_agent: emulator_settings["userAgent"] = user_agent edge_options.add_experimental_option( "mobileEmulation", emulator_settings) edge_options.add_argument("--enable-sync") edge_options.add_argument("--disable-infobars") edge_options.add_argument("--disable-save-password-bubble") edge_options.add_argument("--disable-single-click-autofill") edge_options.add_argument( "--disable-autofill-keyboard-accessory-view[8]") edge_options.add_argument("--disable-translate") if not enable_ws: edge_options.add_argument("--disable-web-security") edge_options.add_argument("--homepage=about:blank") edge_options.add_argument("--dns-prefetch-disable") edge_options.add_argument("--dom-automation") edge_options.add_argument("--disable-hang-monitor") edge_options.add_argument("--disable-prompt-on-repost") if proxy_string: edge_options.add_argument('--proxy-server=%s' % proxy_string) edge_options.add_argument("--test-type") edge_options.add_argument("--log-level=3") edge_options.add_argument("--no-first-run") edge_options.add_argument("--ignore-certificate-errors") if devtools and not headless: edge_options.add_argument("--auto-open-devtools-for-tabs") edge_options.add_argument("--allow-file-access-from-files") edge_options.add_argument("--allow-insecure-localhost") edge_options.add_argument("--allow-running-insecure-content") if user_agent: edge_options.add_argument("--user-agent=%s" % user_agent) edge_options.add_argument("--no-sandbox") if swiftshader: edge_options.add_argument("--use-gl=swiftshader") else: edge_options.add_argument("--disable-gpu") if "linux" in PLATFORM: edge_options.add_argument("--disable-dev-shm-usage") capabilities = edge_options.to_capabilities() capabilities["platform"] = '' return Edge( executable_path=LOCAL_EDGEDRIVER, capabilities=capabilities) elif browser_name == constants.Browser.SAFARI: arg_join = " ".join(sys.argv) if ("-n" in sys.argv) or ("-n=" in arg_join) or (arg_join == "-c"): # Skip if multithreaded raise Exception("Can't run Safari tests in multi-threaded mode!") safari_capabilities = _set_safari_capabilities() return webdriver.Safari(desired_capabilities=safari_capabilities) elif browser_name == constants.Browser.OPERA: try: if LOCAL_OPERADRIVER and os.path.exists(LOCAL_OPERADRIVER): try: make_driver_executable_if_not(LOCAL_OPERADRIVER) except Exception as e: logging.debug("\nWarning: Could not make operadriver" " executable: %s" % e) opera_options = _set_chrome_options( browser_name, downloads_path, headless, locale_code, proxy_string, proxy_auth, proxy_user, proxy_pass, user_agent, disable_csp, enable_ws, enable_sync, use_auto_ext, no_sandbox, disable_gpu, incognito, guest_mode, devtools, swiftshader, block_images, user_data_dir, extension_zip, extension_dir, servername, mobile_emulator, device_width, device_height, device_pixel_ratio) opera_options.headless = False # No support for headless Opera return webdriver.Opera(options=opera_options) except Exception: return webdriver.Opera() elif browser_name == constants.Browser.PHANTOM_JS: with warnings.catch_warnings(): # Ignore "PhantomJS has been deprecated" UserWarning warnings.simplefilter("ignore", category=UserWarning) return webdriver.PhantomJS() elif browser_name == constants.Browser.GOOGLE_CHROME: try: chrome_options = _set_chrome_options( browser_name, downloads_path, headless, locale_code, proxy_string, proxy_auth, proxy_user, proxy_pass, user_agent, disable_csp, enable_ws, enable_sync, use_auto_ext, no_sandbox, disable_gpu, incognito, guest_mode, devtools, swiftshader, block_images, user_data_dir, extension_zip, extension_dir, servername, mobile_emulator, device_width, device_height, device_pixel_ratio) if LOCAL_CHROMEDRIVER and os.path.exists(LOCAL_CHROMEDRIVER): try: make_driver_executable_if_not(LOCAL_CHROMEDRIVER) except Exception as e: logging.debug("\nWarning: Could not make chromedriver" " executable: %s" % e) elif not is_chromedriver_on_path(): args = " ".join(sys.argv) if not ("-n" in sys.argv or "-n=" in args or args == "-c"): # (Not multithreaded) from seleniumbase.console_scripts import sb_install sys_args = sys.argv # Save a copy of current sys args print("\nWarning: chromedriver not found. Installing now:") sb_install.main(override="chromedriver") sys.argv = sys_args # Put back the original sys args if not headless or "linux" not in PLATFORM: return webdriver.Chrome(options=chrome_options) else: # Running headless on Linux try: return webdriver.Chrome(options=chrome_options) except Exception: # Use the virtual display on Linux during headless errors logging.debug("\nWarning: Chrome failed to launch in" " headless mode. Attempting to use the" " SeleniumBase virtual display on Linux...") chrome_options.headless = False return webdriver.Chrome(options=chrome_options) except Exception as e: if headless: raise Exception(e) if LOCAL_CHROMEDRIVER and os.path.exists(LOCAL_CHROMEDRIVER): try: make_driver_executable_if_not(LOCAL_CHROMEDRIVER) except Exception as e: logging.debug("\nWarning: Could not make chromedriver" " executable: %s" % e) return webdriver.Chrome() else: raise Exception( "%s is not a valid browser option for this system!" % browser_name)
def initialize(): # Initialize the program options = EdgeOptions() options.use_chromium = True prefs = {"profile.managed_default_content_settings.images": 2} options.add_experimental_option("prefs", prefs) if headless == 'false': pass else: options.add_argument(argument='--headless') def tutorial(): print('\n[!] What to do after you downloaded the browser engine? [!]') time.sleep(1) print( '1. The WebDriver is downloaded in consideration of your browser version and is located at your "Downloads" folder' ) time.sleep(1) print( '2. To enable the program, the engine "msedgedriver.exe" needs to be placed in PATH. If you"re not sure which directory in your system is PATH, put "msedgedriver.exe" to "System32"' ) time.sleep(1) print( 'I understand your worry about doing something with System32, or you could add PATH yourself if you feel uncomfortable' ) time.sleep(0.5) print( 'Documentation regarding adding folder to path: "https://docs.alfresco.com/4.2/tasks/fot-addpath.html" ' ) print() print( 'If you"re wondering why I do not automate adding PATH in this program, I do not want to do system wide changes in your system. Everything that happens here is within your consent.' ) exit() def get_engine(): with OpenKey( HKEY_CURRENT_USER, 'SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders' ) as key: Downloads = QueryValueEx( key, '{374DE290-123F-4565-9164-39C4925E467B}')[0] keyPath1 = r"Software\Microsoft\Edge\BLBeacon" key1 = OpenKey(HKEY_CURRENT_USER, keyPath1, 0, KEY_READ) edgeVersion1 = QueryValueEx(key1, "version")[0] edgeVersion1 = str(edgeVersion1) # localappdata = os.getenv('LOCALAPPDATA') <- Nick's installation folder install_dir = str('C:\\Windows\\System32') files_list = os.listdir(install_dir) if 'msedgedriver.exe' in files_list: pass else: print('[!] Browser engine not found, downloading ...') ms_site = f'https://msedgedriver.azureedge.net/{edgeVersion1}/edgedriver_win32.zip' destination = str(Downloads + '\\edgedriver_win32.zip') urlretrieve(ms_site, destination) y = 0 while y < 100: time.sleep(2) y += 10 + random.randint(1, 6) if y > 100: print("\r Download progress : 100 %") break else: print("\r Download progress : {}".format(y), '%', end="") tutorial() get_engine() def get_profile(): if 'Auto-raffler' in os.listdir( 'C:\\Users\\nicho\\AppData\\Local\\Microsoft\\Edge'): options.add_argument( 'user-data-dir=C:\\Users\\nicho\\AppData\\Local\\Microsoft\\Edge\\Auto-raffler' ) options.add_argument('profile-directory=Profile 1') else: os.mkdir( 'C:\\Users\\nicho\\AppData\\Local\\Microsoft\\Edge\\Auto-raffler' ) get_profile() get_profile() driver = Edge(options=options, executable_path='C:\\Windows\\System32\\msedgedriver.exe') def get_stat(): # Get raffle info driver.get("https://scrap.tf/raffles") try: stat = driver.find_element_by_tag_name('h1').text stat = str(stat) stat = stat.split('/') entered_count = stat[0] entered_count = int(entered_count) total_count = stat[-1] total_count = int(total_count) available_count = total_count - entered_count return [entered_count, total_count, available_count] except: print('Unable to get website data. Are you logged in?') driver.find_element_by_class_name('sits-login').click() cfm = '' while cfm != 'y': print( 'Feel free to login or not to login. This program saves its data on your local laptop and is not connected to the internet' ) cfm = str(input('Have you logged in? (Y/N) : ')) cfm = cfm.casefold() def get_links(): # Collecting raffle links stat = get_stat() joined = [] available = [] total = [] while len(available) != stat[2]: z = 0 while driver.find_element_by_class_name( 'panel-body.raffle-pagination-done' ).text != "That's all, no more!": z += 1 time.sleep(2) driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") if z == 12: driver.refresh() joined_class = driver.find_elements_by_class_name( "panel-raffle.raffle-entered [href]") joined = [elem.get_attribute('href') for elem in joined_class] total_class = driver.find_elements_by_class_name( "panel-raffle [href]") total = [elem0.get_attribute('href') for elem0 in total_class] for x in joined: if 'profile' in x: joined.remove(x) for y in total: if 'profile' in y: total.remove(y) available = list(set(total) - set(joined)) print('[+] Getting raffle links ...') print(f'[+] Collected {len(joined)} links to joined raffles') print(f'[+] Collected {len(total)} links to all raffles') print(f'[+] Collected {len(available)} links to join-able raffles') print('\n[+] All raffle links collected!') return joined, total, available def enter_raffle(): joined, total, available = get_links() def overwatch(): t = chk_frq while True: stat = get_stat() if stat[0] >= stat[1]: while t != 0: print(f'\r Refreshing in {t} seconds', end=" ") time.sleep(1) t -= 1 driver.refresh() t = chk_frq else: get_stat() raffle_joiner() if joined == total and len(available) == 0: print( 'No raffles are available to be joined. Entering overwatch mode.' ) print('====================================================') overwatch() def raffle_joiner(): start = time.time() start = int(start) for raffle in available: joined.append(raffle) available.remove(raffle) print(f"\r{len(available)} Raffles left.", end=" ") start = int(start) driver.get(url=raffle) desc = '' try: driver.find_element_by_css_selector( '#pid-viewraffle > div.container > div > div.well.raffle-well > div.row.raffle-opts-row > div.col-xs-7.enter-raffle-btns > button:nth-child(3)' ).click() except: try: desc = driver.find_element_by_class_name( 'raffle-row-full-width').text except: print( '\n[!]Uknown error occured, pleas contact the developer!' ) if 'raffle ended' in desc: print( '\n[!] A raffle has ended and I failed to join it on time!' ) time.sleep(delay) stop = time.time() elapsed = stop - start print( f'\n[!] All raffles joined. Elapsed time : {round(elapsed):02d} seconds' ) print('====================================================') overwatch() raffle_joiner() enter_raffle()
from msedge.selenium_tools import Edge, EdgeOptions import time as t import sys # Options for Microsoft Edge options = EdgeOptions() options.use_chromium = True # Headless Mode options.add_argument('headless') # Disabling Log options.add_argument('--log-level=3') # Disabling every log options.add_experimental_option('excludeSwitches', ['enable-logging']) # Driver Path path = r'C:\Users\farma\Downloads\edgedriver\msedgedriver.exe' # Driver Config driver = Edge(options=options, executable_path=path) def fetcher(): # Fetch Logic url = 'https://weather.com/en-IN/' # Launching the URL driver.get(url) # Waiting for the site to fully load. t.sleep(3) # Finding elements: Search Input searcher = driver.find_element_by_xpath('//*[@id="LocationSearch_input"]') searcher.click() # Sending search input. searcher.send_keys(SearchString)
class QCourse: def __init__(self): # 初始化options self.prefs = {"download.default_directory": os.getcwd()} self.options = EdgeOptions() self.options.use_chromium = True self.options.add_argument("log-level=3") self.options.add_experimental_option('excludeSwitches', ['enable-logging']) self.options.add_experimental_option('prefs', self.prefs) self.options.add_argument("--mute-audio") self.login_url = 'https://ke.qq.com/' # Mac 下配置 options 报错,故扔掉了。如果是 Windows,请使用路径下面的 msedgedriver.exe。(注释掉下面一行,放开下下行) self.driver = Edge(executable_path=os.path.join( BASE_DIR, 'msedgedriver'), capabilities={}) # self.driver = Edge(executable_path='msedgedriver.exe', options=self.options) # self.driver = Edge(executable_path=os.path.join(BASE_DIR, 'msedgedriver'), capabilities=desired_cap, options=self.options) def login(self): self.driver.get('https://ke.qq.com/') self.driver.find_element_by_id('js_login').click() time.sleep(1) WebDriverWait(self.driver, 300).until_not( EC.presence_of_element_located((By.CLASS_NAME, 'ptlogin-mask'))) dictCookies = self.driver.get_cookies() jsonCookies = json.dumps(dictCookies) with open('cookies.json', 'w') as f: f.write(jsonCookies) print('登陆成功!') def close(self): self.driver.close() def _get_video(self, video_url=None, path=None, index=None): if not video_url: print('请输入视频url!') # 跳转一次没法跳转,可能是设置了preventDefault self.driver.get(video_url) self.driver.get(video_url) try: # 等待视频开始播放 WebDriverWait(self.driver, 60).until( EC.presence_of_element_located((By.CLASS_NAME, 'loki-time'))) WebDriverWait( self.driver, 60).until_not(lambda driver: driver.find_element_by_class_name( 'loki-time').get_attribute("innerHTML") == '00:00 / 00:00') title = self.driver.title if index is not None: title = "{:02}_{}".format(index, title) networks = self.driver.execute_script( 'return window.performance.getEntries()') ts_url = key_url = '' for network in networks: if '.ts?start' in network.get('name'): ts_url = network.get('name') elif 'get_dk' in network.get('name'): key_url = network.get('name') download_single(ts_url, key_url, title, path) except TimeoutException: # 如果超时,可能是下载的资料,则查看是否有下载按钮,有的话,就下载 title = self.driver.title try: down_btn = self.driver.find_element_by_class_name( 'download-btn') if down_btn.text == '下载资料': url = down_btn.get_attribute('href') download_zip_doc(url, title, path) except Exception: print('没有找到视频,也没有找到可下载的文件,可能是还未开课') def get_video(self, video_url=None, path=None, index=None): if isinstance(video_url, list): for url in video_url: if url: self._get_video(url, path, index) else: self._get_video(video_url, path, index) def load_cookies(self): if not os.path.exists('cookies.json'): self.login() with open('cookies.json', 'r') as f: listCookies = json.loads(f.read()) self.driver.get(self.login_url) for cookie in listCookies: self.driver.add_cookie({ 'domain': '.ke.qq.com', 'httpOnly': cookie['httpOnly'], 'name': cookie['name'], 'path': '/', 'secure': cookie['secure'], 'value': cookie['value'] }) for cookie in utils.get_cookies_dic_list(): self.driver.add_cookie({ 'domain': '.ke.qq.com', 'httpOnly': False, 'name': cookie[0], 'path': '/', 'secure': False, 'value': cookie[1] })
from msedge.selenium_tools import Edge, EdgeOptions from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from selenium.common.exceptions import TimeoutException import os, datetime, csv import pickle driverOptions = EdgeOptions() driverOptions.use_chromium = True driverOptions.binary_location = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe" driverOptions.add_argument("--headless") driverOptions.add_experimental_option("excludeSwitches", ["enable-logging"]) browser = Edge(options=driverOptions) try: try: browser.get('https://www.netztest.at/') cookies = pickle.load( open(os.path.join(os.path.dirname(__file__), 'cookies.pkl'), "rb")) for cookie in cookies: if cookie['name'] == 'RMBTuuid': cookie['expiry'] = int( (datetime.datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0) - datetime.datetime(1970, 1, 1)).total_seconds() + 1209600) browser.add_cookie(cookie) print("added cookie ", cookie) except Exception:
# In[35]: import pyautogui, os, requests, re from msedge.selenium_tools import Edge, EdgeOptions, webdriver options = EdgeOptions() options.use_chromium = True unpacked_extension_path = os.path.join(os.getcwd(), "markdown-clipper") options.add_argument("--load-extension={}".format(unpacked_extension_path)) download_path = os.path.join(os.getcwd(), "Markdown Output\\") prefs = { "download.default_directory": download_path, "profile.default_content_settings.popups": 0, "directory_upgrade": True } options.add_experimental_option("prefs", prefs) options.add_experimental_option("detach", True) driver = Edge(options=options) url = "https://networklessons.com/cisco/ccna-200-301" driver.get(url) # In[43]: agree = driver.find_elements_by_xpath('//*[@id="catapult-cookie-bar"]/div/div') join = driver.find_elements_by_xpath( '//*[@id="om-mvhsujbebu4nqhlzcsgs-optin"]/div/button') if agree: agree[0].click()
from time import sleep from msedge.selenium_tools import Edge, EdgeOptions #edge无头浏览器 phantomJs可用,已停止更新 options = EdgeOptions() options.use_chromium = True options.add_argument("headless") options.add_argument("disable-gpu") #防止打印无用信息 enable-automation规避检测 #最新版浏览器已无用 options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) #谷歌无头 #谷歌88.0版本可用 # from selenium.webdriver import Chrome # from selenium.webdriver import ChromeOptions # options = ChromeOptions() # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') # options.add_experimental_option("excludeSwitches", ["enable-automation",'enable-logging']) # options.add_argument("--disable-blink-features=AutomationControlled") # options.add_experimental_option('useAutomationExtension', False) # wd = Chrome(options=options) wd = Edge(options=options) wd.get('https://www.baidu.com') print(wd.page_source) sleep(2) wd.quit()
class QCourse: def __init__(self): # 初始化options self.prefs = {"download.default_directory": os.getcwd()} self.options = EdgeOptions() self.options.use_chromium = True self.options.add_argument("log-level=3") self.options.add_experimental_option('excludeSwitches', ['enable-logging']) self.options.add_experimental_option('prefs', self.prefs) self.options.add_argument("--mute-audio") self.login_url = 'https://ke.qq.com/' self.driver = Edge(executable_path='msedgedriver.exe', options=self.options) def login(self): self.driver.get('https://ke.qq.com/') self.driver.find_element_by_id('js_login').click() time.sleep(1) WebDriverWait(self.driver, 300).until_not( EC.presence_of_element_located((By.CLASS_NAME, 'ptlogin-mask'))) dictCookies = self.driver.get_cookies() jsonCookies = json.dumps(dictCookies) with open('cookies.json', 'w') as f: f.write(jsonCookies) print('登陆成功!') def close(self): self.driver.close() def get_video(self, video_url=None, path=None): if not video_url: print('请输入视频url!') # os.chdir(BASE_DIR) if not os.path.exists('cookies.json'): self.login() with open('cookies.json', 'r') as f: listCookies = json.loads(f.read()) self.driver.get(video_url) for cookie in listCookies: self.driver.add_cookie({ 'domain': '.ke.qq.com', 'httpOnly': cookie['httpOnly'], 'name': cookie['name'], 'path': '/', 'secure': cookie['secure'], 'value': cookie['value'] }) self.driver.get(video_url) # 等待视频开始播放 WebDriverWait(self.driver, 300).until( EC.presence_of_element_located((By.CLASS_NAME, 'loki-time'))) WebDriverWait( self.driver, 300).until_not(lambda driver: driver.find_element_by_class_name( 'loki-time').get_attribute("innerHTML") == '00:00 / 00:00') networks = self.driver.execute_script( 'return window.performance.getEntries()') ts_url = key_url = '' for network in networks: if '.ts?start' in network.get('name'): ts_url = network.get('name') elif 'get_dk' in network.get('name'): key_url = network.get('name') title = self.driver.title # catalog = self.driver.execute_script('return document.getElementsByClassName("task-item task-info active")' # '[0].parentNode.firstElementChild.innerText') # os.chdir(os.path.join(os.getcwd(), catalog)) download_single(ts_url, key_url, title, path)
def get_browser(_config_, path_prefix=""): """ 获取浏览器对象 :return: """ browser_type = _config_['selenium']['browserType'] headless = _config_['selenium']['headless'] binary = _config_['selenium']['binary'] user_agent = _config_['user-agent'][0] _browser_ = None try: if browser_type == 'Chrome': chrome_options = webdriver.ChromeOptions() # 防止在某些情况下报错` chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) chrome_options.add_argument(f'user-agent={user_agent}') if binary != "": # 当找不到浏览器时需要在 config 里配置路径 chrome_options.binary_location = binary if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser_ = webdriver.Chrome( executable_path=get_file(path_prefix + "./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'darwin': _browser_ = webdriver.Chrome( executable_path=get_file(path_prefix + "./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'win32': _browser_ = webdriver.Chrome( executable_path=get_file(path_prefix + "./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) _browser_.set_window_size(500, 700) elif browser_type == 'Edge': from msedge.selenium_tools import Edge, EdgeOptions edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_argument('--no-sandbox') edge_options.add_argument('--disable-dev-shm-usage') edge_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) if binary != "": edge_options.binary_location = binary if headless: edge_options.add_argument('--headless') edge_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser_ = Edge( executable_path=get_file(path_prefix + "./drivers/msedgedriver"), options=edge_options, capabilities={}) elif sys.platform == 'darwin': _browser_ = Edge( executable_path=get_file(path_prefix + "./drivers/msedgedriver"), capabilities={}, options=edge_options) elif sys.platform == 'win32': _browser_ = Edge( executable_path=get_file(path_prefix + "./drivers/msedgedriver"), capabilities={}, options=edge_options) _browser_.set_window_size(500, 700) elif browser_type == 'Firefox': # 先清除上次的日志 if not os.path.exists(get_file("./logs")): os.mkdir(get_file("./logs/")) open(get_file("./logs/geckodriver.log"), "w").close() firefox_options = webdriver.FirefoxOptions() firefox_options.log.level = "fatal" if binary != "": firefox_options.binary_location = binary if headless: firefox_options.add_argument('--headless') firefox_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser_ = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options, service_log_path=get_file("./logs/geckodriver.log")) elif sys.platform == 'darwin': _browser_ = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) elif sys.platform == 'win32': _browser_ = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) _browser_.set_window_size(500, 700) else: raise WebDriverException return _browser_ except WebDriverException as e: # 驱动问题 if "This version of ChromeDriver only supports Chrome version" in e.args.__str__( ): print("\r[%s] [ERROR] 浏览器错误(chromedriver版本错误),请比对前三位版本号" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) elif "'chromedriver' executable needs to be in PATH" in e.args.__str__( ): print("\r[%s] [ERROR] 浏览器错误,请检查你下载并解压好的驱动是否放在drivers目录下" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) elif "unknown error: cannot find Chrome binary" in e.args.__str__(): print( "\r[%s] [ERROR] 浏览器错误(Chrome浏览器可执行文件路径未成功识别),请在配置文件中修改selenium.binary为浏览器可执行文件绝对路径" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) else: print( "\r[%s] [ERROR] 浏览器错误, 请检查你下载并解压好的驱动是否放在drivers目录下,如需帮助请及时反馈; err: %s" % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), e.args.__str__())) sys.exit(1)
mydate = date_today[8:] # 主题 subject = date_today + your_name + "打卡截图" ###################################### ###################################### # 开始打卡 for t in range(5): try: print(your_name, '第' + str(t + 1) + '次打卡') # executable_path是chromedriver.exe(自行下载)的位置 # options 设置 edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_experimental_option('excludeSwitches', ['enable-automation']) edge_options.add_argument( 'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64)\ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36 Edg/86.0.622.63"' ) #edge_options.binary_location = executable_path driver = Edge(executable_path=executable_path, options=edge_options) driver.maximize_window() driver.get(url) #sleep(1) # 自动输入学号和密码 driver.find_elements_by_xpath(
def chinahpo(hpo_queue): while hpo_queue.empty() is not True: hpo = hpo_queue.get() # 如果使用IP池,则不进行随机等待 s = random.randint(5, 10) print(hpo, "等待 " + str(s) + "秒") time.sleep(s) ip = randomIP() # ip = "socks5://127.0.0.1:1080" hpo_ip = hpo + "\t" + ip print(hpo_ip) options = EdgeOptions() options.use_chromium = True options.add_argument("headless") # options.add_argument("disable-gpu") options.add_argument("--proxy-server=http://{ip}".format(ip=ip)) options.add_argument("--disable-blink-features") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("start-maximized") options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) geo = get_timezone_geolocation(ip) print(geo) geo_json = {"latitude": geo[1], "longitude": geo[2], "accuracy": 1} timezone = {"timezoneId": geo[0]} preferences = { "webrtc.ip_handling_policy": "disable_non_proxied_udp", "webrtc.multiple_routes_enabled": False, "webrtc.nonproxied_udp_enabled": False } options.add_experimental_option("prefs", preferences) msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe" driver = Edge(options=options, executable_path=msedge) script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" driver.execute_script(script) UA = UserAgent().random # UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36" driver.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": UA}) driver.execute_cdp_cmd("Emulation.setGeolocationOverride", geo_json) driver.execute_cdp_cmd("Emulation.setTimezoneOverride", timezone) print(driver.execute_script("return navigator.userAgent;")) hpid = hpo.split(":")[1] url = "http://www.chinahpo.org/#/searchList?trigger=1&tabType=1&searchContent=HP%3A{hpid}".format( hpid=hpid) try: driver.get(url) strtemp = url print("网址:", strtemp) except Exception: print("get page error", hpo) time.sleep(2) with open("html2/hp_" + hpid + ".html", "a+", encoding="utf-8") as f: f.write(str(driver.page_source)) driver.close() fin = open("finish.txt", "a") fin.write(hpo + "\n") fin.close() size = getDocSize("html2/hp_" + hpid + ".html") if 9000 <= size <= 15000: checkIP = open("ip_check_better.txt", "a") checkIP.write(hpo_ip + "\n") checkIP.close()
# 20210413 # https://www.deciphergenomics.org/genes from msedge.selenium_tools import Edge from msedge.selenium_tools import EdgeOptions import time import random options = EdgeOptions() options.use_chromium = True options.add_argument("headless") # options.add_argument("disable-gpu") options.add_argument("--disable-blink-features") options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument("start-maximized") options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) msedge = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe" driver = Edge(options=options, executable_path=msedge) script = "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" driver.execute_script(script) url = "https://www.deciphergenomics.org/genes" driver.get(url) print("网址:", url) # 等待加载 time.sleep(40) # 定位下拉选择框并选择100
class Web_scraping: def __init__(self): '''Initialize the application''' #As using the standard webdriver was giving warnings and messing up the terminal, I used the code below to show just what I want. self.opt = EdgeOptions() self.opt.add_experimental_option('excludeSwitches', ['enable-logging']) self.opt.add_argument("--start-maximized") self.opt.use_chromium = True self.driver = Edge( executable_path= r"C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedgedriver.exe", options=self.opt) def games_link(self): '''Create a list with all season event's link and then create another list with all event's link''' #Creating list with the all season's link self.season_pages_list = [] for y in range(2008, 2022): #Creating the seasons links as str and adding it to a list self.season_link = 'https://www.worldsurfleague.com/events/' + str( y) + '/mct?all=1' self.season_pages_list.append(self.season_link) #Creating a list with the all event's link from each season self.events_link_list = [] for link in self.season_pages_list: self.driver.get(link) #Getting all the events links as selenium format self.event_links = self.driver.find_elements_by_xpath( '//a[@class="event-schedule-details__event-name"]') #Finding the class status completed is needed once it's possible to stop the process on it. self.event_status = self.driver.find_elements_by_xpath( '//span[@class="event-status event-status--completed"]') #Creating event's link list for i in range(0, len(self.event_status)): #Getting the links for each event as a str format self.link_attribute = self.event_links[i].get_attribute('href') self.events_link_list.append(self.link_attribute) with open('events.txt', 'w') as f: for item in self.events_link_list: f.write("%s\n" % item) print('FINISHED') #Getting data inside which event def event_stats(self): #TXT file with all events link to list self.events_link = [ line[0] for line in pd.read_fwf('events.txt', header=None).values.tolist() ] #for link in self.events_link: self.driver.get(self.events_link[0]) #list of all heats self.all_heats_lists = [] while True: #Gets all the waves scores, athletes, nationalities and heats on the page as list. self.waves = self.driver.find_elements_by_xpath( '//*[@class="score"]') self.athletes = self.driver.find_elements_by_xpath( '//*[@class="athlete-name"]') self.nationalities = self.driver.find_elements_by_xpath( '//*[@class="athlete-country-flag"]') self.heat = self.driver.find_elements_by_xpath( '//*[@class="new-heat-hd-name"]') #Gets the round name self.round = self.driver.find_elements_by_xpath( '//*[@class="carousel-item is-selected"]') if len(self.round) == 0: self.round = self.driver.find_elements_by_xpath( '//*[@class="carousel-item last is-selected"]') #Gets the number of surfers and heats on the round, such as the avg surfers per heat (must be 2 or 3) self.number_of_surfers = int(len(self.waves) / 18) #As the final round only has 1 heat, the find_element_by_class_name gets a 'WebDriver' element and not a list self.number_of_heats = len(self.heat) self.surfers_per_heat = int(self.number_of_surfers / self.number_of_heats) #there's a count to deduct 1 stage and gets the round name for each round. self.count = 0 #Gets the stats for each heat self.heat_data = [] for g in range(0, self.number_of_heats): #Page stats #Event stats self.event_turn = self.driver.find_element_by_class_name( 'event-meta-tour-info').text.split()[2][1:] self.event_period = self.driver.find_element_by_class_name( 'event-schedule__date-range').text self.event_name = self.driver.find_element_by_class_name( 'event-title').text.split('\n')[0] self.event_local = re.split( r'(\d+)', self.driver.find_element_by_class_name( 'event-meta-tour-info').text)[2] self.avg_wave_score = re.split( r'(\d+\.\d+)', self.driver.find_element_by_class_name( 'new-heat-hd-status').text)[1] #Heat's id for the database self.heat_id = (f'heat{g + 1}' + self.round[0].text + self.event_turn + self.event_period[-4:]).lower() #Surfer stats self.surfer1 = self.athletes[g * 2].text self.surfer1_nat = self.nationalities[g * 2].get_attribute('title') self.surfer1_best_w1 = self.waves[g * 18 + (1 - 1)].text self.surfer1_best_w2 = self.waves[g * 18 + (2 - 1)].text self.surfer1_total = self.waves[g * 18 + (3 - 1)].text self.surfer1_w01 = self.waves[g * 18 + (4 - 1)].text self.surfer1_w02 = self.waves[g * 18 + (5 - 1)].text self.surfer1_w03 = self.waves[g * 18 + (6 - 1)].text self.surfer1_w04 = self.waves[g * 18 + (7 - 1)].text self.surfer1_w05 = self.waves[g * 18 + (8 - 1)].text self.surfer1_w06 = self.waves[g * 18 + (9 - 1)].text self.surfer1_w07 = self.waves[g * 18 + (10 - 1)].text self.surfer1_w08 = self.waves[g * 18 + (11 - 1)].text self.surfer1_w09 = self.waves[g * 18 + (12 - 1)].text self.surfer1_w10 = self.waves[g * 18 + (13 - 1)].text self.surfer1_w11 = self.waves[g * 18 + (14 - 1)].text self.surfer1_w12 = self.waves[g * 18 + (15 - 1)].text self.surfer1_w13 = self.waves[g * 18 + (16 - 1)].text self.surfer1_w14 = self.waves[g * 18 + (17 - 1)].text self.surfer1_w15 = self.waves[g * 18 + (18 - 1)].text #Surfer 2 stats self.surfer2 = self.athletes[g * 2 + 1].text self.surfer2_nat = self.nationalities[g * 2 + 1].get_attribute('title') self.surfer2_best_w1 = self.waves[g * 18 + (19 - 1)].text self.surfer2_best_w2 = self.waves[g * 18 + (20 - 1)].text self.surfer2_total = self.waves[g * 18 + (21 - 1)].text self.surfer2_w01 = self.waves[g * 18 + (22 - 1)].text self.surfer2_w02 = self.waves[g * 18 + (23 - 1)].text self.surfer2_w03 = self.waves[g * 18 + (24 - 1)].text self.surfer2_w04 = self.waves[g * 18 + (25 - 1)].text self.surfer2_w05 = self.waves[g * 18 + (26 - 1)].text self.surfer2_w06 = self.waves[g * 18 + (27 - 1)].text self.surfer2_w07 = self.waves[g * 18 + (28 - 1)].text self.surfer2_w08 = self.waves[g * 18 + (29 - 1)].text self.surfer2_w09 = self.waves[g * 18 + (30 - 1)].text self.surfer2_w10 = self.waves[g * 18 + (31 - 1)].text self.surfer2_w11 = self.waves[g * 18 + (32 - 1)].text self.surfer2_w12 = self.waves[g * 18 + (33 - 1)].text self.surfer2_w13 = self.waves[g * 18 + (34 - 1)].text self.surfer2_w14 = self.waves[g * 18 + (35 - 1)].text self.surfer2_w15 = self.waves[g * 18 + (36 - 1)].text #Inputing all variables into the heat_data list self.heat_data.append(self.heat_id) self.heat_data.append(self.event_name) self.heat_data.append(self.event_local) self.heat_data.append(self.event_turn) self.heat_data.append(self.event_period) self.heat_data.append(self.avg_wave_score) self.heat_data.append(self.surfer1) self.heat_data.append(self.surfer1_nat) self.heat_data.append(self.surfer1_best_w1) self.heat_data.append(self.surfer1_best_w2) self.heat_data.append(self.surfer1_total) self.heat_data.append(self.surfer1_w01) self.heat_data.append(self.surfer1_w02) self.heat_data.append(self.surfer1_w03) self.heat_data.append(self.surfer1_w04) self.heat_data.append(self.surfer1_w05) self.heat_data.append(self.surfer1_w06) self.heat_data.append(self.surfer1_w07) self.heat_data.append(self.surfer1_w08) self.heat_data.append(self.surfer1_w09) self.heat_data.append(self.surfer1_w10) self.heat_data.append(self.surfer1_w11) self.heat_data.append(self.surfer1_w12) self.heat_data.append(self.surfer1_w13) self.heat_data.append(self.surfer1_w14) self.heat_data.append(self.surfer1_w15) self.heat_data.append(self.surfer2) self.heat_data.append(self.surfer2_nat) self.heat_data.append(self.surfer2_best_w1) self.heat_data.append(self.surfer2_best_w2) self.heat_data.append(self.surfer2_total) self.heat_data.append(self.surfer2_w01) self.heat_data.append(self.surfer2_w02) self.heat_data.append(self.surfer2_w03) self.heat_data.append(self.surfer2_w04) self.heat_data.append(self.surfer2_w05) self.heat_data.append(self.surfer2_w06) self.heat_data.append(self.surfer2_w07) self.heat_data.append(self.surfer2_w08) self.heat_data.append(self.surfer2_w09) self.heat_data.append(self.surfer2_w10) self.heat_data.append(self.surfer2_w11) self.heat_data.append(self.surfer2_w12) self.heat_data.append(self.surfer2_w13) self.heat_data.append(self.surfer2_w14) self.heat_data.append(self.surfer2_w15) self.all_heats_lists.append(self.heat_data.copy()) self.heat_data.clear() #Click on the previous round botton print(self.all_heats_lists) try: self.prev_round_bt = self.driver.find_element_by_xpath( '//*[@class="flickity-button-icon"]').click() except: self.prev_round_bt = self.driver.find_element_by_xpath( '//*[@class="flickity-button-icon"]') self.driver.execute_script("arguments[0].scrollIntoView();", self.prev_round_bt) time.sleep(.5) self.prev_round_bt.click() time.sleep(2.5)
def open_browser(executable_path="msedgedriver", edge_args=None, desired_capabilities=None, **kwargs): """Open Edge browser instance and cache the driver. Parameters ---------- executable_path : str (Default "msedgedriver") path to the executable. If the default is used it assumes the executable is in the $PATH. port : int (Default 0) port you would like the service to run, if left as 0, a free port will be found. desired_capabilities : dict (Default None) Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref". chrome_args : Optional arguments to modify browser settings """ options = EdgeOptions() options.use_chromium = True # If user wants to re-use existing browser session then # he/she has to set variable BROWSER_REUSE_ENABLED to True. # If enabled, then web driver connection details are written # to an argument file. This file enables re-use of the current # chrome session. # # When variables BROWSER_SESSION_ID and BROWSER_EXECUTOR_URL are # set from argument file, then OpenBrowser will use those # parameters instead of opening new chrome session. # New Remote Web Driver is created in headless mode. edge_path = kwargs.get( 'edge_path', None) or BuiltIn().get_variable_value('${EDGE_PATH}') if edge_path: options.binary_location = edge_path if user.is_root(): options.add_argument("no-sandbox") if edge_args: if any('--headless' in _.lower() for _ in edge_args): CONFIG.set_value('Headless', True) for item in edge_args: options.add_argument(item.lstrip()) options.add_argument("start-maximized") options.add_argument("--disable-notifications") if 'headless' in kwargs: CONFIG.set_value('Headless', True) options.add_argument("--headless") if 'prefs' in kwargs: if isinstance(kwargs.get('prefs'), dict): prefs = kwargs.get('prefs') else: prefs = util.prefs_to_dict(kwargs.get('prefs').strip()) options.add_experimental_option('prefs', prefs) logger.warn("prefs: {}".format(prefs)) driver = Edge(BuiltIn().get_variable_value('${EDGEDRIVER_PATH}') or executable_path, options=options, desired_capabilities=desired_capabilities) browser.cache_browser(driver) return driver
def get_browser(_config): """ 获取浏览器对象 :return: """ browser_type = _config['selenium']['browserType'] headless = _config['selenium']['headless'] binary = _config['selenium']['binary'] user_agent = _config['user-agent'][0] try: if browser_type == 'Chrome': chrome_options = webdriver.ChromeOptions() # 防止在某些情况下报错` chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) chrome_options.add_argument(f'user-agent={user_agent}') if binary != "": # 当找不到浏览器时需要在 config 里配置路径 chrome_options.binary_location = binary if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser = webdriver.Chrome( executable_path=get_file("./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'darwin': _browser = webdriver.Chrome( executable_path=get_file("./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) elif sys.platform == 'win32': _browser = webdriver.Chrome( executable_path=get_file("./drivers/chromedriver"), desired_capabilities={}, options=chrome_options) _browser.set_window_size(500, 700) elif browser_type == 'Edge': from msedge.selenium_tools import Edge, EdgeOptions edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_argument('--no-sandbox') edge_options.add_argument('--disable-dev-shm-usage') edge_options.add_experimental_option( "excludeSwitches", ['enable-automation', 'enable-logging']) if binary != "": edge_options.binary_location = binary if headless: edge_options.add_argument('--headless') edge_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser = Edge( executable_path=get_file("./drivers/msedgedriver"), options=edge_options, capabilities={}) elif sys.platform == 'darwin': _browser = Edge( executable_path=get_file("./drivers/msedgedriver"), capabilities={}, options=edge_options) elif sys.platform == 'win32': _browser = Edge( executable_path=get_file("./drivers/msedgedriver"), capabilities={}, options=edge_options) _browser.set_window_size(500, 700) elif browser_type == 'Firefox': # 先清除上次的日志 if not os.path.exists(get_file("./logs")): os.mkdir(get_file("./logs/")) open(get_file("./logs/geckodriver.log"), "w").close() firefox_options = webdriver.FirefoxOptions() firefox_options.log.level = "fatal" if binary != "": firefox_options.binary_location = binary if headless: firefox_options.add_argument('--headless') firefox_options.add_argument('--disable-gpu') if sys.platform == 'linux': _browser = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options, service_log_path=get_file("./logs/geckodriver.log")) elif sys.platform == 'darwin': _browser = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) elif sys.platform == 'win32': _browser = webdriver.Firefox( executable_path=get_file('./drivers/geckodriver'), options=firefox_options) _browser.set_window_size(500, 700) else: raise WebDriverException return _browser except WebDriverException: # 驱动问题 print("ERROR", "浏览器错误", "请检查你下载并解压好的驱动是否放在drivers目录下")