def browser(self): config_browser = self.config_browser() config_wait_time = self.config_wait_time() config_headless = self.headless_browser() if config_browser == 'chrome': if config_headless == 'yes': driver_options = chrome_options() driver_options.headless = True driver = Chrome(options=driver_options) else: driver = Chrome() elif config_browser == 'firefox': if config_headless == 'yes': driver_options = ff_options() driver_options.headless = True driver = Firefox(options=driver_options) else: driver = Firefox() elif config_browser == 'opera': if config_headless == 'yes': driver_options = opera_options() driver_options.headless = True driver = Opera(options=driver_options) else: driver = Opera() else: raise Exception(f'"{config_browser}" is not a supported browser') driver.implicitly_wait(config_wait_time) return driver
def get_browser_driver(): """ Using following env vars: WEB_DRIVER: name of the driver "chrome", "firefox" Get web-driver - choose between Chrome, Firefox etc. :return: driver instance object """ capabilities = dict(getattr(DesiredCapabilities, WEB_DRIVER)) log.info(f"Webdriver is: {WEB_DRIVER}, running headless: {HEADLESS}") if WEB_DRIVER == 'CHROME': chromedriver = os.path.join(PROJECT_PATH, "chromedriver.exe") chrome_options = ch_options() if HEADLESS: chrome_options.set_headless() browser_driver = Chrome(executable_path=chromedriver, desired_capabilities=capabilities, options=chrome_options) elif WEB_DRIVER == 'FIREFOX': geckodriver = os.path.join(PROJECT_PATH, "geckodriver.exe") firefox_options = ff_options() if HEADLESS: firefox_options.set_headless() browser_driver = Firefox(executable_path=geckodriver, capabilities=capabilities, options=firefox_options) elif WEB_DRIVER == 'SAFARI': safaridriver = os.path.join(PROJECT_PATH, "safaridriver.exe") # headless mode is not possible right now in Safari browser_driver = Safari(executable_path=safaridriver, capabilities=capabilities) else: raise Exception('Unknown/unsupported driver selected: ' + WEB_DRIVER) return browser_driver
def firefox(self, sign_test: bool): ff_webdriver = self.firefox_path() save_folder = self.save_folder() extensions = self.extensions_path(False) ff_profile = webdriver.FirefoxProfile() options = ff_options() if sign_test == False: options.headless = True else: ff_profile.add_extension(extension=extensions) options.set_preference("browser.download.folderList", 2) options.set_preference("browser.download.dir", save_folder) options.set_preference("browser.download.useDownloadDir", True) options.set_preference( "browser.download.viewableInternally.enabledTypes", "") options.set_preference( "browser.helperApps.neverAsk.saveToDisk", "application/pdf;text/plain;application/text;text/xml;application/xml" ) options.set_preference("pdfjs.disabled", True) return webdriver.Firefox(executable_path=ff_webdriver, firefox_profile=ff_profile, options=options)
def create_driver(self, spider): """ creates firefox and chrome drivers """ # user_agent = random.choice(spider.user_agents) random_proxy = random.choice( spider.proxies) if spider.proxies else None options = ff_options() # options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-javascript') options.add_argument('--disable-dev-shm-usage') options.add_argument('--width=1460') options.add_argument('--height=780') firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True if random_proxy: # get a random proxy from spider proxies and set it in driver firefox_capabilities['proxy'] = { "proxyType": "MANUAL", "httpProxy": random_proxy, "ftpProxy": random_proxy, "sslProxy": random_proxy } profile = webdriver.FirefoxProfile() profile.set_preference("media.peerconnection.enabled", False) profile.set_preference("media.navigator.enabled", False) # profile.set_preference("general.useragent.override", user_agent) profile.update_preferences() driver = webdriver.Firefox( executable_path=settings.FIREFOX_WEB_DRIVER_PATH, capabilities=firefox_capabilities, firefox_profile=profile, firefox_options=options) return driver
def browser(): #def browser(browser_mode): #TODO: refactor this part browser_mode = json_config_parse.get_browser() # Initialize WebDriver if browser_mode == 'chrome': options = ch_options() # options.add_argument('--headless') options.add_argument('start-maximized') driver = webdriver.Chrome(executable_path='chromedriver', options=options) elif browser_mode == 'firefox': options = ff_options() options.add_argument('--headless') options.add_argument('start-maximized') driver = webdriver.Firefox(executable_path='geckodriver', options=options) else: raise Exception('browser is not a supported browser') driver.implicitly_wait('5') yield driver driver.quit()
def create_driver(cls, random_proxy, user_agent, for_headers=False, webrtc=True): """ creates firefox or chrome driver with given settings :param random_proxy: :param user_agent: :param for_headers: :param webrtc: :return: """ if cls.browser == 'firefox': options = ff_options() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-javascript') options.add_argument('--disable-dev-shm-usage') firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True if random_proxy: firefox_capabilities['proxy'] = { "proxyType": "MANUAL", "httpProxy": random_proxy, "ftpProxy": random_proxy, "sslProxy": random_proxy } profile = webdriver.FirefoxProfile() profile.set_preference("media.peerconnection.enabled", False) profile.set_preference("media.navigator.enabled", False) profile.set_preference("general.useragent.override", user_agent) profile.update_preferences() if for_headers: driver = Firefox(executable_path=settings.FIREFOX_WEB_DRIVER_PATH, capabilities=firefox_capabilities, firefox_profile=profile, firefox_options=options) else: driver = webdriver.Firefox(executable_path=settings.FIREFOX_WEB_DRIVER_PATH, capabilities=firefox_capabilities, firefox_profile=profile, firefox_options=options) else: options = ch_options() options.add_argument('--no-sandbox') options.add_argument('--disable-javascript') options.add_argument('--disable-dev-shm-usage') options.add_argument(f'user-agent={user_agent}') if random_proxy: options.add_argument(f'--proxy-server={random_proxy}') if webrtc: # options.add_extension(settings.WEB_DRIVER_EXTENSION_PATH) pass else: options.add_argument('--headless') if for_headers: driver = Chrome(settings.WEB_DRIVER_PATH, chrome_options=options) else: driver = webdriver.Chrome(settings.WEB_DRIVER_PATH, chrome_options=options) return driver
def finale(self, response): """ It opens a browser and get description and url also :param response: :return: """ options = ff_options() options.add_argument('--headless') driver = webdriver.Firefox(options=options) driver.get(response.url) driver.maximize_window() time.sleep(5) response1 = scrapy.Selector(text=driver.page_source) item = dict() item['url'] = response.url item['description'] = ''.join( response1.css( '.content.style-scope.ytd-video-secondary-info-renderer ::text' ).extract()) writer.writerow(item) driver.close()
def create_driver(random_proxy, user_agent, for_headers=False, webrtc=True): """ creates firefox or chrome driver with given settings :param random_proxy: :param user_agent: :param for_headers: :param webrtc: :return: """ options = ff_options() # options.add_argument('--headless') firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True if random_proxy: firefox_capabilities['proxy'] = { "proxyType": "MANUAL", "httpProxy": random_proxy, "ftpProxy": random_proxy, "sslProxy": random_proxy } driver = webdriver.Firefox(executable_path='C:\Windows\geckodriver', firefox_options=options, capabilities=firefox_capabilities) return driver
def create_driver(random_proxy=None): """ creates firefox or chrome driver with given settings :param random_proxy: :param user_agent: :param for_headers: :param webrtc: :return: """ options = ff_options() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-javascript') options.add_argument('--disable-dev-shm-usage') user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0' firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX firefox_capabilities['marionette'] = True if random_proxy: firefox_capabilities['proxy'] = { "proxyType": "MANUAL", "httpProxy": random_proxy, "ftpProxy": random_proxy, "sslProxy": random_proxy } profile = webdriver.FirefoxProfile() profile.set_preference("media.peerconnection.enabled", False) profile.set_preference("media.navigator.enabled", False) profile.set_preference("general.useragent.override", user_agent) profile.update_preferences() driver = webdriver.Firefox(executable_path='./geckodriver', capabilities=firefox_capabilities, firefox_profile=profile, firefox_options=options) return driver
def get_webdriver_instance(self): if self.base_url is None: self.base_url = 'www.camelodge.com' if self.browser == 'firefox': driver = webdriver.Firefox() # Todo Edge browser elif self.browser == 'safari': driver = webdriver.Safari( executable_path='/Applications/Safari.app/Contents/MacOS/Safari' ) elif self.browser == 'chrome': chrome_driver = os.getcwd() + "/chromedriver" driver = webdriver.Chrome(executable_path=chrome_driver) elif self.browser == 'firefox_headless': firefox_options = ff_options() firefox_options.headless = True firefox_options.add_argument("--window_size=2560X1600") gecko_driver = os.getcwd() + "/geckodriver" driver = webdriver.Firefox(options=firefox_options, executable_path=gecko_driver) elif self.browser == 'chrome_headless': chrome_options = c_options() chrome_options.add_argument("--headless") chrome_options.add_argument("--window_size=2560X1600") chrome_driver = os.getcwd() + "/chromedriver" driver = webdriver.Chrome(options=chrome_options, executable_path=chrome_driver) else: driver = webdriver.Firefox() driver.maximize_window() driver.implicitly_wait(3) driver.get(url=self.base_url) return driver
import csv from scrapy.crawler import CrawlerProcess from seleniumwire import webdriver from selenium.webdriver.firefox.options import Options as ff_options #from scraping import settings Rfile=open('zipcode_zillow.txt','r') inputfile=Rfile.read() Wfile=open('zillowData.csv','w',encoding='utf-8',newline='') csv_columns=['Price','Bedrooms','Bathrooms','Square feet','Address','Listing type','Zestimate','Est. payment:','Time on Zillow','Type:','Year built:','Heating:','Cooling:','Parking:','HOA:','Lot:','Price/sqft:','Rent Zestimate','Neighborhood stats','median Zestimate','Zillow link',] writer=csv.DictWriter(Wfile,fieldnames=csv_columns) writer.writeheader() options = ff_options() options.add_argument('--headless') driver = webdriver.Firefox(firefox_options=options) class zillow(scrapy.Spider): name = 'zillow' template_url = 'https://www.zillow.com/search/GetSearchPageState.htm?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A{}%7D%2C%22usersSearchTerm%22%3A%22{}%22%2C%22mapBounds%22%3A%7B%22west%22%3A-74.08536545581086%2C%22east%22%3A-73.9504396135257%2C%22south%22%3A40.69677215592377%2C%22north%22%3A40.77733747707232%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A61615%2C%22regionType%22%3A7%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22isForSaleByAgent%22%3A%7B%22value%22%3Afalse%7D%2C%22isForSaleByOwner%22%3A%7B%22value%22%3Afalse%7D%2C%22isNewConstruction%22%3A%7B%22value%22%3Afalse%7D%2C%22isForSaleForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isComingSoon%22%3A%7B%22value%22%3Afalse%7D%2C%22isAuction%22%3A%7B%22value%22%3Afalse%7D%2C%22isPreMarketForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isPreMarketPreForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isForRent%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%7D&wants={%22cat1%22:[%22listResults%22,%22mapResults%22,%22total%22]}&requestId={}' request_count = 1 def close_driver(cls, driver): driver.quit() def start_requests(self):
def getWebDriverInstance(self): """ Get WebDriver Instance based on the browser configuration For Bamboo integration need to use the webdriver manager to install the webdrivers on runtime instead of using the path on local machine: https://github.com/SergeyPirogov/webdriver_manager Returns: 'WebDriver Instance' """ """" # Location where I save the drivers on my local machine chromeDriverLocation = "C:\\Users\\nhussein\\PycharmProjects\\chromedriver.exe" ffDriverLocation = "C:\\Users\\nhussein\\PycharmProjects\\geckodriver.exe" ieDriverLocation = "C:\\Users\\nhussein\\PycharmProjects\\IEDriverServer.exe" """ if self.browser == "ie": # Set ie driver #os.environ["webdriver.ie.driver"] = ieDriverLocation #driver = webdriver.Ie(ieDriverLocation) #install IE driver to default path C:\Users\nhussein\.wdm\IEDriverServer\3.141.5\Win32 driver = webdriver.Ie(IEDriverManager(os_type="win32").install()) elif self.browser == "edge": #default installation folder C:\Users\nhussein\.wdm\MicrosoftWebDriver\latest\win driver = webdriver.Edge(EdgeDriverManager().install()) elif self.browser == "ff": #driver = webdriver.Firefox() # default installation path C:\Users\nhussein\.wdm\geckodriver\v0.23.0\win64 driver = webdriver.Firefox( executable_path=GeckoDriverManager().install()) elif self.browser == "ffdocker": #driver = webdriver.Firefox() capabilities = DesiredCapabilities.FIREFOX.copy() driver = webdriver.Remote("http://127.0.0.1:4446/wd/hub", capabilities) elif self.browser == "ffheadless": ffDriverLocation = "C:\TEMP\geckodriver.exe" webdriver.Firefox(executable_path=GeckoDriverManager().install( path="C:\TEMP")) options = ff_options() options.headless = True driver = webdriver.Firefox(options=options, executable_path=ffDriverLocation) elif self.browser == "chrome": # Set chrome driver #os.environ["webdriver.chrome.driver"] = chromeDriverLocation #driver = webdriver.Chrome(chromeDriverLocation) driver = webdriver.Chrome(ChromeDriverManager().install()) driver.set_window_size(1920, 1080) elif self.browser == "chromedocker": # Set chrome driver #driverLocation = "C:\\Users\\nhussein\\PycharmProjects\\selenium_workspace\\chromedriver.exe" #os.environ["webdriver.chrome.driver"] = chromeDriverLocation ####### THIS WILL USE DOCKER CONTAINER AND LAUNCH THE SCRIPT ON VNC ##########################3########## capabilities = DesiredCapabilities.CHROME.copy() #capabilities['platform'] = "WINDOWS" #capabilities['version'] = "10" capabilities['takesScreenshot'] = True driver = webdriver.Remote("http://127.0.0.1:4446/wd/hub", capabilities) driver.set_window_size(1920, 1080) elif self.browser == "chromeheadless": chromeDriverLocation = "C:\TEMP\chromedriver.exe" webdriver.Chrome(ChromeDriverManager().install(path="C:\TEMP")) # To use the default driver installation path comment out the above 2 lines and uncomment the below 2 lines #webdriver.Chrome(ChromeDriverManager().install()) #chromeDriverLocation = str(self.getHomeDirectory())+"\.wdm\chromedriver\\2.45\win32\chromedriver.exe" options = chrome_options() options.headless = True driver = webdriver.Chrome(chromeDriverLocation, chrome_options=options) elif self.browser == "mobile": # Select which device you want to emulate by uncommenting it # More information at: https://sites.google.com/a/chromium.org/chromedriver/mobile-emulation mobile_emulation = { "deviceName": "iPhone 6/7/8" # "deviceName": "iPhone 6/7/8 Plus" # "deviceName": "iPhone X" # "deviceName": "iPad" # "deviceName": "iPad Mini" # "deviceName": "iPad Pro" # "deviceName": "Nexus 10" # "deviceName": "Galaxy S III" # "deviceName": "Galaxy Note 3" # Or specify a specific build using the following two arguments # "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 }, # "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" } # Define a variable to hold all the configurations we want options = chrome_options() # Add the mobile emulation to the chrome options variable options.add_experimental_option("mobileEmulation", mobile_emulation) # Create driver, pass it the path to the chromedriver file and the special configurations you want to run chromeDriverLocation = "C:\TEMP\chromedriver.exe" webdriver.Chrome(ChromeDriverManager().install(path="C:\TEMP")) driver = webdriver.Chrome(chromeDriverLocation, chrome_options=options) else: #driver = webdriver.Firefox() driver = webdriver.Firefox( executable_path=GeckoDriverManager().install()) # Setting Driver Implicit Time out for An Element driver.implicitly_wait(3) # Maximize the window driver.maximize_window() #selecting the URL based on the environment param #env = self.environment.lower() if self.environment == 'qa': baseURL = "https://portal.qa.aws.wfscorp.com/" elif self.environment == 'test': baseURL = "https://portal.test.aws.wfscorp.com/" elif self.environment == 'dev': baseURL = "https://portal.dev.aws.wfscorp.com/" else: baseURL = "https://portal.qa.aws.wfscorp.com/" # Loading browser with App URL driver.get(baseURL) return driver