示例#1
0
def driver(config):
    browser = config['browser']
    version = config['version']
    url = config['url']
    download_dir = config['download_dir']

    if browser == 'chrome':
        options = ChromeOptions()
        options.add_argument("--window-size=800,600")

        prefs = {"download.default_directory": download_dir}
        options.add_experimental_option('prefs', prefs)

        capabilities = {
            'acceptInsecureCerts': True,
            'browserName': 'chrome',
            'version': version,
        }

        driver = webdriver.Remote(
            command_executor='http://127.0.0.1:4444/wd/hub/',
            options=options,
            desired_capabilities=capabilities)
    elif browser == 'firefox':
        manager = GeckoDriverManager(version=version)
        driver = webdriver.Firefox(executable_path=manager.install())

    else:
        raise UsupportedBrowserException(f'Usupported browser: "{browser}"')

    driver.get(url)
    driver.maximize_window()
    yield driver
    driver.close()
示例#2
0
def setup(browser):
    #global driver
    if browser == 'chrome':
        driver = webdriver.Chrome(ChromeDriverManager().install())
        driver.maximize_window()
    elif browser == 'firefox':
        driver = webdriver.Firefox(GeckoDriverManager.install())
        driver.maximize_window()
    else:
        driver = webdriver.Chrome(ChromeDriverManager().install())
        driver.maximize_window()
    return driver
示例#3
0
def start_selenium(driver_type='firefox',
                   seleniumwire_driver=False,
                   timeout=60,
                   is_headless=False,
                   proxy=None,
                   **kwargs):
    if platform.system() == 'Linux':
        display = Display(visible=0, size=(1280, 1024))
        display.start()

    if seleniumwire_driver:
        driver_module = xhr_webdriver
    else:
        driver_module = webdriver

    if driver_type == 'firefox':
        driver_class = driver_module.Firefox
        driver_path = Path(__file__).parent / '_webdrivers' / 'gecko.exe'
        webdriver_manager = GeckoDriverManager()
        options = firefox_options()
        caps = webdriver.DesiredCapabilities.FIREFOX
    elif driver_type == 'chrome':
        driver_class = driver_module.Chrome
        driver_path = Path(__file__).parent / '_webdrivers' / 'chrome.exe'
        webdriver_manager = ChromeDriverManager()
        options = chrome_options()
        caps = webdriver.DesiredCapabilities.CHROME
    else:
        raise NotImplementedError(
            f"Запуск драйвера {driver_type} не реализован")

    if proxy:
        caps['proxy'] = {
            "proxyType": "MANUAL",
            "httpProxy": proxy,
            "sslProxy": proxy
        }

    driver_path = driver_path.resolve()
    if not driver_path.exists():
        driver_path.parent.mkdir(exist_ok=True)
        cache_path = webdriver_manager.install()
        shutil.copy(cache_path, str(driver_path))

    options.headless = is_headless
    driver = driver_class(options=options,
                          executable_path=str(driver_path),
                          capabilities=caps)
    if not is_headless:
        driver.maximize_window()
    driver.set_page_load_timeout(timeout)
    return driver
示例#4
0
def driver(config):
    browser = config['browser']
    version = config['version']
    url = config['url']
    download_dir = config['download_dir']

    if browser == 'chrome':
        options = ChromeOptions()
        options.add_argument("--window-size=800,600")

        prefs = {"download.default_directory": download_dir}
        options.add_experimental_option('prefs', prefs)

        # manager = ChromeDriverManager(version=version)
        # driver = webdriver.Chrome(executable_path=manager.install(),
        #                           options=options,
        #                           desired_capabilities={'acceptInsecureCerts': True}
        #                           )
        driver = webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub/',
                                  options=options,
                                  desired_capabilities={'acceptInsecureCerts': True}
                                  )

    elif browser == 'firefox':
        manager = GeckoDriverManager(version=version)
        driver = webdriver.Firefox(executable_path=manager.install())

    else:
        raise UsupportedBrowserException(f'Usupported browser: "{browser}"')

    driver.get(url)
    driver.maximize_window()
    yield driver

    # quit = закрыть страницу, остановить browser driver
    # close = закрыть страницу, бинарь browser driver останется запущенным
    driver.quit()
import dictionary
import csv
import error_logger

from google import google
from datetime import datetime, timedelta
from selenium import webdriver
from bs4 import BeautifulSoup
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.firefox.options import Options

# create browser instance
manager = GeckoDriverManager()
browserOptions = Options()
browserOptions.add_argument("--headless")
driver = webdriver.Firefox(executable_path=manager.install(),
                           options=browserOptions)

NOT_FOUND = 'None'


def startScraping():
    try:
        dictionaryDetail = dictionary.readDict()
        searchKey = dictionaryDetail['search_key']
        filters = dictionaryDetail['filters']
        startPage = dictionaryDetail['next_page']
        timeNow = datetime.now()

        if searchKey is "":
            print(
示例#6
0
class DownloadManager(Service):
    th = None
    gdm = None
    queue = queue.Queue()
    requests = {}
    retry = 0
    driver = None
    driver_status = DriverStatus.STOPPED

    def initialize(self):
        GObject.signal_new('download-profile-missing', DownloadManager,
                           GObject.SignalFlags.RUN_LAST, None, ())
        GObject.signal_new('request-complete', DownloadManager,
                           GObject.SignalFlags.RUN_LAST, GObject.TYPE_PYOBJECT,
                           (GObject.TYPE_PYOBJECT, ))
        GObject.signal_new('request-canceled', DownloadManager,
                           GObject.SignalFlags.RUN_LAST, GObject.TYPE_PYOBJECT,
                           (GObject.TYPE_PYOBJECT, ))
        GObject.signal_new('download-canceled-user', DownloadManager,
                           GObject.SignalFlags.RUN_LAST, None, ())
        GObject.signal_new('download-canceled-system', DownloadManager,
                           GObject.SignalFlags.RUN_LAST, None, ())

        self.kill_gecko_processes()
        self.connect_signals()
        self.gdm = GeckoDriverManager(log_level=logging.ERROR)
        self.gecko_downloader = SeleniumService(
            executable_path=self.gdm.install())
        self.th = threading.Thread(name='download', target=self.download)
        self.th.setDaemon(True)
        self.th.start()
        self.log.debug("Basico Download Manager started")

    def get_services(self):
        self.srvutl = self.get_service('Utils')

    def connect_signals(self):
        self.connect('download-profile-missing',
                     self.srvutl.download_webdriver_setup)
        self.connect('download-canceled-user', self.cancel)

    def kill_gecko_processes(self):
        np = 0
        for proc in psutil.process_iter():
            if proc.name() == 'geckodriver':
                np += 1
                proc.kill()
        self.log.debug("Killed %d geckodriver instances", np)

    def check_profile(self, rid):
        files = glob.glob(os.path.join(LPATH['FIREFOX_PROFILE'], '*'))
        has_profile = len(files) > 0
        if not has_profile:
            self.log.warning("[%s] Webdriver profile available? %s", rid,
                             has_profile)
            self.emit('download-profile-missing')
        return has_profile

    def __set_driver(self, driver):
        self.driver = driver

    def get_driver(self):
        return self.driver

    def __set_driver_status(self, status):
        self.driver_status = status

    def get_driver_status(self):
        return self.driver_status

    def get_url_uri(self):
        return self.url_uri

    def get_url_type(self):
        return self.url_type

    def request(self, rid, url_sid, url_uri, url_type):
        wait_time = 30 * random.random()  # random wait from 0 to 5s
        self.log.debug("Download waiting time: %2.2f" % wait_time)
        time.sleep(wait_time)
        try:
            alive = self.th.is_alive()
            # ~ self.log.debug("[%s] Download thread alive? %s" , rid, alive)
            if not alive:
                self.log.debug("[%s] Restarting download process", rid)
                self.th = threading.Thread(name='download',
                                           target=self.download)
                self.th.setDaemon(True)
                self.th.start()
        except Exception:
            self.log.error(
                "You need an internet connection and having configured your custom Firefox profile"
            )
            self.emit("request-canceled", self.request[rid])
            return

        has_profile = self.check_profile(rid)
        if not has_profile:
            return

        self.log.info("[%s] Request enqueued for SAP Note %s", rid, url_sid)
        self.requests[rid] = {}
        self.requests[rid]['url_rid'] = rid
        self.requests[rid]['url_sid'] = url_sid
        self.requests[rid]['url_uri'] = url_uri
        self.requests[rid]['url_typ'] = url_type
        self.queue.put(rid)

    def download(self):
        while True:
            rid = self.queue.get()
            url = self.requests[rid]['url_uri']
            sid = self.requests[rid]['url_sid']
            self.log.debug("[%s] Request download for SAP Note %s", rid, sid)
            if self.retry > 2:
                self.__set_driver_status(DriverStatus.DISABLE)

            status = self.get_driver_status()
            # ~ self.log.debug("[%s] WebDriver status: %s", rid, status)
            if status == DriverStatus.DISABLE:
                self.log.error("[%s] Webdriver not working anymore", rid)
                return None

            while status == DriverStatus.RUNNING:
                status = self.get_driver_status()

            if status == DriverStatus.STOPPED:
                try:
                    options = Options()
                    options.profile = LPATH['FIREFOX_PROFILE']
                    options.headless = True
                    driver = webdriver.Firefox(options=options,
                                               service=self.gecko_downloader)
                    self.__set_driver_status(DriverStatus.WAITING)
                    self.__set_driver(driver)
                    self.log.debug("[%s] Webdriver instance created and ready",
                                   rid)
                except Exception as error:
                    self.__set_driver_status(DriverStatus.STOPPED)
                    self.log.error("[%s] Webdriver Error: %s", rid, error)
                    self.retry += 1
                    url_sid = self.requests[rid]['url_sid']
                    url_uri = self.requests[rid]['url_uri']
                    url_type = self.requests[rid]['url_typ']
                    self.request(url_sid, url_uri, url_type)

            status = self.get_driver_status()
            if status == DriverStatus.WAITING:
                self.__set_driver_status(DriverStatus.RUNNING)
                try:
                    driver = self.get_driver()
                    driver.get(url)
                    element_present = EC.presence_of_element_located(
                        (By.ID, 'content'))
                    WebDriverWait(driver, TIMEOUT).until(element_present)
                except TimeoutException:
                    self.__set_driver_status(DriverStatus.WAITING)
                except Exception as error:
                    self.log.debug("[%s] Webdriver Error: %s", rid, error)
                    self.retry += 1
                    self.__set_driver_status(DriverStatus.STOPPED)
                    self.request(url)
                finally:
                    self.log.debug("[%s] SAP Note %s downloaded", rid,
                                   self.requests[rid]['url_sid'])
                    self.retry = 0
                    self.__set_driver_status(DriverStatus.WAITING)
                    self.emit('request-complete', (self.requests[rid]))
            self.queue.task_done()

    def cancel(self, *args):
        # ~ def cancel_download():
        with self.queue.mutex:
            self.queue.queue.clear()
        self.log.debug("Download queue empty")
        # ~ GLib.idle_add(cancel_download)

    def cancel_by_user(self, *args):
        self.log.warning("Cancel download requested by user")
        self.emit('download-canceled-user')

    def browse_note(self, *args):
        sid = args[1]
        url = SAP_NOTE_URL % sid
        cmd = "firefox --profile %s %s" % (LPATH['FIREFOX_PROFILE'], url)
        subprocess.Popen(cmd, shell=True)
        self.log.info("Displaying SAP Note %s", sid)

    def browse_pdf(self, *args):
        sid = args[1]
        url = SAP_NOTE_URL_PDF % sid
        cmd = "firefox --profile %s %s" % (LPATH['FIREFOX_PROFILE'], url)
        subprocess.Popen(cmd, shell=True)
        self.log.info("Displaying PDF for SAP Note %s", sid)

    def end(self):
        self.log.debug("Stopping Basico Download Manager")
        # ~ self.queue.join() # FIXME: after cancel download, it doesn't work
        if self.driver is not None:
            self.log.debug("Stopping webdriver")
            self.driver.quit()
            self.log.debug("Webdriver stopped")
        self.queue = None
        self.log.debug("Basico Download Manager stopped")
示例#7
0
from selenium import  webdriver
from selenium.webdriver.common.by import  By
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
import  time

broswerName = "chrome"

if broswerName == "chrome":
  driver= webdriver.Chrome(ChromeDriverManager.install())
elif broswerName == "firefox":
    driver = webdriver.firefox(executable_path=GeckoDriverManager.install())
else:
    print('Please select required broswer'+broswerName)
示例#8
0
from selenium import webdriver
from webdriver_manager.firefox import GeckoDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager

#driver= webdriver.firefox(executable_path="GeckoDriver.exe")
driver= webdriver.firefox(GeckoDriverManager.install())
driver.implicitly_wait(5)
driver.maximize_window()
driver.get("https://reminderbot.io/")
print("Title is "+driver.title)
driver.minimize_window()
print("Current Url is "+driver.current_url)
driver.maximize_window()
driver.quit()

#driver= webdriver.edge(executable_path="MicrosoftWebDriver.exe")
driver= webdriver.edge(EdgeChromiumDriverManager().install())
driver.implicitly_wait(5)
driver.maximize_window()
driver.get("https://reminderbot.io/")
print("Title is "+driver.title)
driver.minimize_window()
print("Current Url is "+driver.current_url)
driver.maximize_window()
driver.quit()