def __open_browser(use_proxy: bool = False): # TODO: add user agent chrome_options = webdriver.ChromeOptions() capabilities = webdriver.DesiredCapabilities.CHROME if use_proxy: random_proxy = Proxies.get_random_proxy() # Parse Proxy if '@' in random_proxy: auth, ip_port = random_proxy.split('@') user, pwd = auth.split(':') ip, port = ip_port.split(':') with zipfile.ZipFile(plugin_file, 'w') as zp: zp.writestr("manifest.json", manifest_json) zp.writestr("background.js", background_js % (ip, port, user, pwd)) chrome_options.add_extension(plugin_file) else: prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = random_proxy prox.socks_proxy = random_proxy prox.ssl_proxy = random_proxy capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) return webdriver.Chrome(chrome_options=chrome_options)
def __init__(self, *args, seleniumwire_options=None, **kwargs): """Initialise a new Firefox WebDriver instance. Args: seleniumwire_options: The seleniumwire options dictionary. """ if seleniumwire_options is None: seleniumwire_options = {} try: firefox_options = kwargs['options'] except KeyError: firefox_options = FirefoxOptions() kwargs['options'] = firefox_options # Prevent Firefox from bypassing the Selenium Wire proxy # for localhost addresses. firefox_options.set_preference( 'network.proxy.allow_hijacking_localhost', True) firefox_options.accept_insecure_certs = True config = self._setup_backend(seleniumwire_options) if seleniumwire_options.get('auto_config', True): if SELENIUM_V4: # From Selenium v4.0.0 the browser's proxy settings can no longer # be passed using desired capabilities and we must use the options # object instead. proxy = Proxy() proxy.http_proxy = config['proxy']['httpProxy'] proxy.ssl_proxy = config['proxy']['sslProxy'] try: proxy.no_proxy = config['proxy']['noProxy'] except KeyError: pass firefox_options.proxy = proxy else: # Earlier versions of Selenium use capabilities to pass the settings. capabilities = kwargs.get('capabilities', kwargs.get('desired_capabilities')) if capabilities is None: capabilities = DesiredCapabilities.FIREFOX capabilities = capabilities.copy() capabilities.update(config) kwargs['capabilities'] = capabilities super().__init__(*args, **kwargs)
def prepare_desired_capabilities(self): capabilities = DesiredCapabilities.FIREFOX.copy() capabilities['javascriptEnabled'] = True # capabilities['pageLoadStrategy'] = 'normal' # Set proxy proxy_string = configs.RANDOM_PROXY(return_tuple=False) proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = proxy_string proxy.ssl_proxy = proxy_string # proxy.ftp_proxy = proxy_string # prox.socks_proxy = proxy_string # proxy.add_to_capabilities(capabilities) return capabilities
def get_tuned_driver(parser_name: str, logger: 'Logger', proxy_ip: Optional[str] = None, proxy_port: Optional[str] = None, headless: bool = True) -> 'WebDriver': os.environ["DISPLAY"] = ':99' chrome_options = Options() capabilities = DesiredCapabilities.CHROME capabilities['goog:loggingPrefs'] = {'browser': 'ALL'} if proxy_ip and proxy_port: prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = f"{proxy_ip}:{proxy_port}" prox.ssl_proxy = f"{proxy_ip}:{proxy_port}" try: response = requests.get('https://google.com', proxies={ 'http': f'{proxy_ip}:{proxy_port}', 'https': f'{proxy_ip}:{proxy_port}', }) except requests.RequestException: update_proxy_status(proxy_ip, AccessStatus.fail) raise if response.status_code != 200: update_proxy_status(proxy_ip, AccessStatus.fail) logger.critical(f'proxy {proxy_ip}:{proxy_port} not work') exit(-1) update_proxy_status(proxy_ip, AccessStatus.success) prox.add_to_capabilities(capabilities) logger.info(f'{parser_name} use proxy: {proxy_ip}:{proxy_port}') if headless: chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--headless") chrome_options.add_argument("--remote-debugging-port=9222") chrome_options.add_argument("--disable-infobars") chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--no-sandbox") driver = webdriver.Chrome(options=chrome_options, desired_capabilities=capabilities) else: driver = webdriver.Chrome(options=chrome_options, desired_capabilities=capabilities) prefs = {"profile.default_content_setting_values.notifications": 2} chrome_options.add_experimental_option('prefs', prefs) chrome_options.add_experimental_option('useAutomationExtension', False) chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) chrome_options.add_argument('start-maximized') chrome_options.add_argument('incognito') driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined, enumerable: false, configurable: true }); const newProto = navigator.__proto__; delete newProto.webdriver; navigator.__proto__ = newProto; delete navigator.webdriver; """ }) driver.execute_cdp_cmd( 'Network.setUserAgentOverride', { "userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/83.0.4103.53 Safari/537.36' }) driver.implicitly_wait(5) return driver