def driver(config): browser = config['browser'] version = config['version'] url = config['url'] download_dir = config['download_dir'] if browser == 'chrome': options = ChromeOptions() options.add_argument("--window-size=800,600") prefs = {"download.default_directory": download_dir} options.add_experimental_option('prefs', prefs) capabilities = { 'acceptInsecureCerts': True, 'browserName': 'chrome', 'version': version, } driver = webdriver.Remote( command_executor='http://127.0.0.1:4444/wd/hub/', options=options, desired_capabilities=capabilities) elif browser == 'firefox': manager = GeckoDriverManager(version=version) driver = webdriver.Firefox(executable_path=manager.install()) else: raise UsupportedBrowserException(f'Usupported browser: "{browser}"') driver.get(url) driver.maximize_window() yield driver driver.close()
def setup(browser): #global driver if browser == 'chrome': driver = webdriver.Chrome(ChromeDriverManager().install()) driver.maximize_window() elif browser == 'firefox': driver = webdriver.Firefox(GeckoDriverManager.install()) driver.maximize_window() else: driver = webdriver.Chrome(ChromeDriverManager().install()) driver.maximize_window() return driver
def start_selenium(driver_type='firefox', seleniumwire_driver=False, timeout=60, is_headless=False, proxy=None, **kwargs): if platform.system() == 'Linux': display = Display(visible=0, size=(1280, 1024)) display.start() if seleniumwire_driver: driver_module = xhr_webdriver else: driver_module = webdriver if driver_type == 'firefox': driver_class = driver_module.Firefox driver_path = Path(__file__).parent / '_webdrivers' / 'gecko.exe' webdriver_manager = GeckoDriverManager() options = firefox_options() caps = webdriver.DesiredCapabilities.FIREFOX elif driver_type == 'chrome': driver_class = driver_module.Chrome driver_path = Path(__file__).parent / '_webdrivers' / 'chrome.exe' webdriver_manager = ChromeDriverManager() options = chrome_options() caps = webdriver.DesiredCapabilities.CHROME else: raise NotImplementedError( f"Запуск драйвера {driver_type} не реализован") if proxy: caps['proxy'] = { "proxyType": "MANUAL", "httpProxy": proxy, "sslProxy": proxy } driver_path = driver_path.resolve() if not driver_path.exists(): driver_path.parent.mkdir(exist_ok=True) cache_path = webdriver_manager.install() shutil.copy(cache_path, str(driver_path)) options.headless = is_headless driver = driver_class(options=options, executable_path=str(driver_path), capabilities=caps) if not is_headless: driver.maximize_window() driver.set_page_load_timeout(timeout) return driver
def driver(config): browser = config['browser'] version = config['version'] url = config['url'] download_dir = config['download_dir'] if browser == 'chrome': options = ChromeOptions() options.add_argument("--window-size=800,600") prefs = {"download.default_directory": download_dir} options.add_experimental_option('prefs', prefs) # manager = ChromeDriverManager(version=version) # driver = webdriver.Chrome(executable_path=manager.install(), # options=options, # desired_capabilities={'acceptInsecureCerts': True} # ) driver = webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub/', options=options, desired_capabilities={'acceptInsecureCerts': True} ) elif browser == 'firefox': manager = GeckoDriverManager(version=version) driver = webdriver.Firefox(executable_path=manager.install()) else: raise UsupportedBrowserException(f'Usupported browser: "{browser}"') driver.get(url) driver.maximize_window() yield driver # quit = закрыть страницу, остановить browser driver # close = закрыть страницу, бинарь browser driver останется запущенным driver.quit()
import dictionary import csv import error_logger from google import google from datetime import datetime, timedelta from selenium import webdriver from bs4 import BeautifulSoup from webdriver_manager.firefox import GeckoDriverManager from selenium.webdriver.firefox.options import Options # create browser instance manager = GeckoDriverManager() browserOptions = Options() browserOptions.add_argument("--headless") driver = webdriver.Firefox(executable_path=manager.install(), options=browserOptions) NOT_FOUND = 'None' def startScraping(): try: dictionaryDetail = dictionary.readDict() searchKey = dictionaryDetail['search_key'] filters = dictionaryDetail['filters'] startPage = dictionaryDetail['next_page'] timeNow = datetime.now() if searchKey is "": print(
class DownloadManager(Service): th = None gdm = None queue = queue.Queue() requests = {} retry = 0 driver = None driver_status = DriverStatus.STOPPED def initialize(self): GObject.signal_new('download-profile-missing', DownloadManager, GObject.SignalFlags.RUN_LAST, None, ()) GObject.signal_new('request-complete', DownloadManager, GObject.SignalFlags.RUN_LAST, GObject.TYPE_PYOBJECT, (GObject.TYPE_PYOBJECT, )) GObject.signal_new('request-canceled', DownloadManager, GObject.SignalFlags.RUN_LAST, GObject.TYPE_PYOBJECT, (GObject.TYPE_PYOBJECT, )) GObject.signal_new('download-canceled-user', DownloadManager, GObject.SignalFlags.RUN_LAST, None, ()) GObject.signal_new('download-canceled-system', DownloadManager, GObject.SignalFlags.RUN_LAST, None, ()) self.kill_gecko_processes() self.connect_signals() self.gdm = GeckoDriverManager(log_level=logging.ERROR) self.gecko_downloader = SeleniumService( executable_path=self.gdm.install()) self.th = threading.Thread(name='download', target=self.download) self.th.setDaemon(True) self.th.start() self.log.debug("Basico Download Manager started") def get_services(self): self.srvutl = self.get_service('Utils') def connect_signals(self): self.connect('download-profile-missing', self.srvutl.download_webdriver_setup) self.connect('download-canceled-user', self.cancel) def kill_gecko_processes(self): np = 0 for proc in psutil.process_iter(): if proc.name() == 'geckodriver': np += 1 proc.kill() self.log.debug("Killed %d geckodriver instances", np) def check_profile(self, rid): files = glob.glob(os.path.join(LPATH['FIREFOX_PROFILE'], '*')) has_profile = len(files) > 0 if not has_profile: self.log.warning("[%s] Webdriver profile available? %s", rid, has_profile) self.emit('download-profile-missing') return has_profile def __set_driver(self, driver): self.driver = driver def get_driver(self): return self.driver def __set_driver_status(self, status): self.driver_status = status def get_driver_status(self): return self.driver_status def get_url_uri(self): return self.url_uri def get_url_type(self): return self.url_type def request(self, rid, url_sid, url_uri, url_type): wait_time = 30 * random.random() # random wait from 0 to 5s self.log.debug("Download waiting time: %2.2f" % wait_time) time.sleep(wait_time) try: alive = self.th.is_alive() # ~ self.log.debug("[%s] Download thread alive? %s" , rid, alive) if not alive: self.log.debug("[%s] Restarting download process", rid) self.th = threading.Thread(name='download', target=self.download) self.th.setDaemon(True) self.th.start() except Exception: self.log.error( "You need an internet connection and having configured your custom Firefox profile" ) self.emit("request-canceled", self.request[rid]) return has_profile = self.check_profile(rid) if not has_profile: return self.log.info("[%s] Request enqueued for SAP Note %s", rid, url_sid) self.requests[rid] = {} self.requests[rid]['url_rid'] = rid self.requests[rid]['url_sid'] = url_sid self.requests[rid]['url_uri'] = url_uri self.requests[rid]['url_typ'] = url_type self.queue.put(rid) def download(self): while True: rid = self.queue.get() url = self.requests[rid]['url_uri'] sid = self.requests[rid]['url_sid'] self.log.debug("[%s] Request download for SAP Note %s", rid, sid) if self.retry > 2: self.__set_driver_status(DriverStatus.DISABLE) status = self.get_driver_status() # ~ self.log.debug("[%s] WebDriver status: %s", rid, status) if status == DriverStatus.DISABLE: self.log.error("[%s] Webdriver not working anymore", rid) return None while status == DriverStatus.RUNNING: status = self.get_driver_status() if status == DriverStatus.STOPPED: try: options = Options() options.profile = LPATH['FIREFOX_PROFILE'] options.headless = True driver = webdriver.Firefox(options=options, service=self.gecko_downloader) self.__set_driver_status(DriverStatus.WAITING) self.__set_driver(driver) self.log.debug("[%s] Webdriver instance created and ready", rid) except Exception as error: self.__set_driver_status(DriverStatus.STOPPED) self.log.error("[%s] Webdriver Error: %s", rid, error) self.retry += 1 url_sid = self.requests[rid]['url_sid'] url_uri = self.requests[rid]['url_uri'] url_type = self.requests[rid]['url_typ'] self.request(url_sid, url_uri, url_type) status = self.get_driver_status() if status == DriverStatus.WAITING: self.__set_driver_status(DriverStatus.RUNNING) try: driver = self.get_driver() driver.get(url) element_present = EC.presence_of_element_located( (By.ID, 'content')) WebDriverWait(driver, TIMEOUT).until(element_present) except TimeoutException: self.__set_driver_status(DriverStatus.WAITING) except Exception as error: self.log.debug("[%s] Webdriver Error: %s", rid, error) self.retry += 1 self.__set_driver_status(DriverStatus.STOPPED) self.request(url) finally: self.log.debug("[%s] SAP Note %s downloaded", rid, self.requests[rid]['url_sid']) self.retry = 0 self.__set_driver_status(DriverStatus.WAITING) self.emit('request-complete', (self.requests[rid])) self.queue.task_done() def cancel(self, *args): # ~ def cancel_download(): with self.queue.mutex: self.queue.queue.clear() self.log.debug("Download queue empty") # ~ GLib.idle_add(cancel_download) def cancel_by_user(self, *args): self.log.warning("Cancel download requested by user") self.emit('download-canceled-user') def browse_note(self, *args): sid = args[1] url = SAP_NOTE_URL % sid cmd = "firefox --profile %s %s" % (LPATH['FIREFOX_PROFILE'], url) subprocess.Popen(cmd, shell=True) self.log.info("Displaying SAP Note %s", sid) def browse_pdf(self, *args): sid = args[1] url = SAP_NOTE_URL_PDF % sid cmd = "firefox --profile %s %s" % (LPATH['FIREFOX_PROFILE'], url) subprocess.Popen(cmd, shell=True) self.log.info("Displaying PDF for SAP Note %s", sid) def end(self): self.log.debug("Stopping Basico Download Manager") # ~ self.queue.join() # FIXME: after cancel download, it doesn't work if self.driver is not None: self.log.debug("Stopping webdriver") self.driver.quit() self.log.debug("Webdriver stopped") self.queue = None self.log.debug("Basico Download Manager stopped")
from selenium import webdriver from selenium.webdriver.common.by import By from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.firefox import GeckoDriverManager import time broswerName = "chrome" if broswerName == "chrome": driver= webdriver.Chrome(ChromeDriverManager.install()) elif broswerName == "firefox": driver = webdriver.firefox(executable_path=GeckoDriverManager.install()) else: print('Please select required broswer'+broswerName)
from selenium import webdriver from webdriver_manager.firefox import GeckoDriverManager from webdriver_manager.microsoft import EdgeChromiumDriverManager #driver= webdriver.firefox(executable_path="GeckoDriver.exe") driver= webdriver.firefox(GeckoDriverManager.install()) driver.implicitly_wait(5) driver.maximize_window() driver.get("https://reminderbot.io/") print("Title is "+driver.title) driver.minimize_window() print("Current Url is "+driver.current_url) driver.maximize_window() driver.quit() #driver= webdriver.edge(executable_path="MicrosoftWebDriver.exe") driver= webdriver.edge(EdgeChromiumDriverManager().install()) driver.implicitly_wait(5) driver.maximize_window() driver.get("https://reminderbot.io/") print("Title is "+driver.title) driver.minimize_window() print("Current Url is "+driver.current_url) driver.maximize_window() driver.quit()