Пример #1
0
    def __open_browser(use_proxy: bool = False):
        # TODO: add user agent
        chrome_options = webdriver.ChromeOptions()
        capabilities = webdriver.DesiredCapabilities.CHROME
        if use_proxy:
            random_proxy = Proxies.get_random_proxy()
            # Parse Proxy
            if '@' in random_proxy:
                auth, ip_port = random_proxy.split('@')
                user, pwd = auth.split(':')
                ip, port = ip_port.split(':')

                with zipfile.ZipFile(plugin_file, 'w') as zp:
                    zp.writestr("manifest.json", manifest_json)
                    zp.writestr("background.js",
                                background_js % (ip, port, user, pwd))
                chrome_options.add_extension(plugin_file)
            else:
                prox = Proxy()
                prox.proxy_type = ProxyType.MANUAL
                prox.http_proxy = random_proxy
                prox.socks_proxy = random_proxy
                prox.ssl_proxy = random_proxy
                capabilities = webdriver.DesiredCapabilities.CHROME
                prox.add_to_capabilities(capabilities)

        return webdriver.Chrome(chrome_options=chrome_options)
Пример #2
0
    def __init__(self, *args, seleniumwire_options=None, **kwargs):
        """Initialise a new Firefox WebDriver instance.

        Args:
            seleniumwire_options: The seleniumwire options dictionary.
        """
        if seleniumwire_options is None:
            seleniumwire_options = {}

        try:
            firefox_options = kwargs['options']
        except KeyError:
            firefox_options = FirefoxOptions()
            kwargs['options'] = firefox_options

        # Prevent Firefox from bypassing the Selenium Wire proxy
        # for localhost addresses.
        firefox_options.set_preference(
            'network.proxy.allow_hijacking_localhost', True)
        firefox_options.accept_insecure_certs = True

        config = self._setup_backend(seleniumwire_options)

        if seleniumwire_options.get('auto_config', True):
            if SELENIUM_V4:
                # From Selenium v4.0.0 the browser's proxy settings can no longer
                # be passed using desired capabilities and we must use the options
                # object instead.
                proxy = Proxy()
                proxy.http_proxy = config['proxy']['httpProxy']
                proxy.ssl_proxy = config['proxy']['sslProxy']

                try:
                    proxy.no_proxy = config['proxy']['noProxy']
                except KeyError:
                    pass

                firefox_options.proxy = proxy
            else:
                # Earlier versions of Selenium use capabilities to pass the settings.
                capabilities = kwargs.get('capabilities',
                                          kwargs.get('desired_capabilities'))
                if capabilities is None:
                    capabilities = DesiredCapabilities.FIREFOX
                capabilities = capabilities.copy()

                capabilities.update(config)
                kwargs['capabilities'] = capabilities

        super().__init__(*args, **kwargs)
    def prepare_desired_capabilities(self):
        capabilities = DesiredCapabilities.FIREFOX.copy()
        capabilities['javascriptEnabled'] = True
        # capabilities['pageLoadStrategy'] = 'normal'

        # Set proxy
        proxy_string = configs.RANDOM_PROXY(return_tuple=False)
        proxy = Proxy()
        proxy.proxy_type = ProxyType.MANUAL
        proxy.http_proxy = proxy_string
        proxy.ssl_proxy = proxy_string
        # proxy.ftp_proxy = proxy_string
        # prox.socks_proxy = proxy_string
        # proxy.add_to_capabilities(capabilities)

        return capabilities
Пример #4
0
def get_tuned_driver(parser_name: str,
                     logger: 'Logger',
                     proxy_ip: Optional[str] = None,
                     proxy_port: Optional[str] = None,
                     headless: bool = True) -> 'WebDriver':
    os.environ["DISPLAY"] = ':99'

    chrome_options = Options()

    capabilities = DesiredCapabilities.CHROME
    capabilities['goog:loggingPrefs'] = {'browser': 'ALL'}
    if proxy_ip and proxy_port:
        prox = Proxy()
        prox.proxy_type = ProxyType.MANUAL
        prox.http_proxy = f"{proxy_ip}:{proxy_port}"
        prox.ssl_proxy = f"{proxy_ip}:{proxy_port}"
        try:
            response = requests.get('https://google.com',
                                    proxies={
                                        'http': f'{proxy_ip}:{proxy_port}',
                                        'https': f'{proxy_ip}:{proxy_port}',
                                    })
        except requests.RequestException:
            update_proxy_status(proxy_ip, AccessStatus.fail)
            raise
        if response.status_code != 200:
            update_proxy_status(proxy_ip, AccessStatus.fail)
            logger.critical(f'proxy {proxy_ip}:{proxy_port} not work')
            exit(-1)
        update_proxy_status(proxy_ip, AccessStatus.success)
        prox.add_to_capabilities(capabilities)

        logger.info(f'{parser_name} use proxy: {proxy_ip}:{proxy_port}')
    if headless:
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--remote-debugging-port=9222")
        chrome_options.add_argument("--disable-infobars")
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--no-sandbox")

        driver = webdriver.Chrome(options=chrome_options,
                                  desired_capabilities=capabilities)
    else:
        driver = webdriver.Chrome(options=chrome_options,
                                  desired_capabilities=capabilities)

    prefs = {"profile.default_content_setting_values.notifications": 2}
    chrome_options.add_experimental_option('prefs', prefs)
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_experimental_option('excludeSwitches',
                                           ['enable-automation'])
    chrome_options.add_argument('start-maximized')
    chrome_options.add_argument('incognito')

    driver.execute_cdp_cmd(
        "Page.addScriptToEvaluateOnNewDocument", {
            "source":
            """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => undefined,
          enumerable: false,
          configurable: true
        });
        const newProto = navigator.__proto__;
        delete newProto.webdriver;
        navigator.__proto__ = newProto;
        delete navigator.webdriver;
      """
        })

    driver.execute_cdp_cmd(
        'Network.setUserAgentOverride', {
            "userAgent":
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/83.0.4103.53 Safari/537.36'
        })

    driver.implicitly_wait(5)
    return driver