import dataflows as DF
import time
import logging
import os

from selenium.webdriver import Chrome
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chromium.remote_connection import ChromiumRemoteConnection
ChromiumRemoteConnection.set_timeout(300)

from datapackage_pipelines_budgetkey.common.google_chrome import google_chrome_driver


def wrapper(year):
    gcd = None
    try:
        gcd = google_chrome_driver(initial='http://example.com/')
        return scraper(gcd, year)
    finally:
        logging.info('Tearing down %r', gcd)
        if gcd:
            gcd.teardown()


def get_chart(driver):
    # Switch to results page & iframe
    frame = WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.ID, "openDocChildFrame")))
Пример #2
0
    def __init__(self,
                 browser_name,
                 vendor_prefix,
                 port=DEFAULT_PORT,
                 options: Options = None,
                 service_args=None,
                 desired_capabilities=None,
                 service_log_path=DEFAULT_SERVICE_LOG_PATH,
                 service: Service = None,
                 keep_alive=DEFAULT_KEEP_ALIVE):
        """
        Creates a new WebDriver instance of the ChromiumDriver.
        Starts the service and then creates new WebDriver instance of ChromiumDriver.

        :Args:
         - browser_name - Browser name used when matching capabilities.
         - vendor_prefix - Company prefix to apply to vendor-specific WebDriver extension commands.
         - port - Deprecated: port you would like the service to run, if left as 0, a free port will be found.
         - options - this takes an instance of ChromiumOptions
         - service_args - Deprecated: List of args to pass to the driver service
         - desired_capabilities - Deprecated: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_log_path - Deprecated: Where to log information from the driver.
         - keep_alive - Deprecated: Whether to configure ChromiumRemoteConnection to use HTTP keep-alive.
        """
        if desired_capabilities:
            warnings.warn(
                'desired_capabilities has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        if port != DEFAULT_PORT:
            warnings.warn(
                'port has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        self.port = port
        if service_log_path != DEFAULT_SERVICE_LOG_PATH:
            warnings.warn(
                'service_log_path has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        if keep_alive != DEFAULT_KEEP_ALIVE and type(self) == __class__:
            warnings.warn(
                'keep_alive has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        else:
            keep_alive = True

        _ignore_proxy = None
        if not options:
            options = self.create_options()

        if desired_capabilities:
            for key, value in desired_capabilities.items():
                options.set_capability(key, value)

        if options._ignore_local_proxy:
            _ignore_proxy = options._ignore_local_proxy

        self.vendor_prefix = vendor_prefix

        if not service:
            raise AttributeError('service cannot be None')

        self.service = service
        self.service.start()

        try:
            RemoteWebDriver.__init__(
                self,
                command_executor=ChromiumRemoteConnection(
                    remote_server_addr=self.service.service_url,
                    browser_name=browser_name,
                    vendor_prefix=vendor_prefix,
                    keep_alive=keep_alive,
                    ignore_proxy=_ignore_proxy),
                desired_capabilities=desired_capabilities)
        except Exception:
            self.quit()
            raise
        self._is_remote = False
Пример #3
0
    def __init__(self,
                 executable_path="chromedriver",
                 port=DEFAULT_PORT,
                 options=None,
                 service_args=None,
                 desired_capabilities=None,
                 service_log_path=DEFAULT_SERVICE_LOG_PATH,
                 service=None,
                 keep_alive=True):
        """
        Creates a new WebDriver instance of the ChromiumDriver.
        Starts the service and then creates new WebDriver instance of ChromiumDriver.

        :Args:
         - executable_path - Deprecated: path to the executable. If the default is used it assumes the executable is in the $PATH
         - port - Deprecated: port you would like the service to run, if left as 0, a free port will be found.
         - options - this takes an instance of ChromiumOptions
         - service_args - Deprecated: List of args to pass to the driver service
         - desired_capabilities - Deprecated: Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
         - service_log_path - Deprecated: Where to log information from the driver.
         - keep_alive - Whether to configure ChromiumRemoteConnection to use HTTP keep-alive.
        """
        if executable_path != 'chromedriver':
            warnings.warn(
                'executable_path has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        if desired_capabilities is not None:
            warnings.warn(
                'desired_capabilities has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        if port != DEFAULT_PORT:
            warnings.warn(
                'port has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)
        self.port = port
        if service_log_path != DEFAULT_SERVICE_LOG_PATH:
            warnings.warn(
                'service_log_path has been deprecated, please pass in a Service object',
                DeprecationWarning,
                stacklevel=2)

        if options is None:
            # desired_capabilities stays as passed in
            if desired_capabilities is None:
                desired_capabilities = self.create_options().to_capabilities()
        else:
            if desired_capabilities is None:
                desired_capabilities = options.to_capabilities()
            else:
                desired_capabilities.update(options.to_capabilities())

        if service is None:
            raise AttributeError('service cannot be None')

        self.service = service
        self.service.start()

        try:
            RemoteWebDriver.__init__(
                self,
                command_executor=ChromiumRemoteConnection(
                    remote_server_addr=self.service.service_url,
                    keep_alive=keep_alive),
                desired_capabilities=desired_capabilities)
        except Exception:
            self.quit()
            raise
        self._is_remote = False