Exemplo n.º 1
0
    def browser(self):
        config_browser = self.config_browser()
        config_wait_time = self.config_wait_time()
        config_headless = self.headless_browser()

        if config_browser == 'chrome':
            if config_headless == 'yes':
                driver_options = chrome_options()
                driver_options.headless = True
                driver = Chrome(options=driver_options)
            else:
                driver = Chrome()
        elif config_browser == 'firefox':
            if config_headless == 'yes':
                driver_options = ff_options()
                driver_options.headless = True
                driver = Firefox(options=driver_options)
            else:
                driver = Firefox()
        elif config_browser == 'opera':
            if config_headless == 'yes':
                driver_options = opera_options()
                driver_options.headless = True
                driver = Opera(options=driver_options)
            else:
                driver = Opera()
        else:
            raise Exception(f'"{config_browser}" is not a supported browser')

        driver.implicitly_wait(config_wait_time)

        return driver
def get_browser_driver():
    """
    Using following env vars:
    WEB_DRIVER: name of the driver "chrome", "firefox"

    Get web-driver - choose between Chrome, Firefox etc.
    :return: driver instance object
    """
    capabilities = dict(getattr(DesiredCapabilities, WEB_DRIVER))
    log.info(f"Webdriver is: {WEB_DRIVER}, running headless: {HEADLESS}")
    if WEB_DRIVER == 'CHROME':
        chromedriver = os.path.join(PROJECT_PATH, "chromedriver.exe")
        chrome_options = ch_options()
        if HEADLESS:
            chrome_options.set_headless()
        browser_driver = Chrome(executable_path=chromedriver,
                                desired_capabilities=capabilities,
                                options=chrome_options)
    elif WEB_DRIVER == 'FIREFOX':
        geckodriver = os.path.join(PROJECT_PATH, "geckodriver.exe")
        firefox_options = ff_options()
        if HEADLESS:
            firefox_options.set_headless()
        browser_driver = Firefox(executable_path=geckodriver,
                                 capabilities=capabilities,
                                 options=firefox_options)
    elif WEB_DRIVER == 'SAFARI':
        safaridriver = os.path.join(PROJECT_PATH, "safaridriver.exe")
        # headless mode is not possible right now in Safari
        browser_driver = Safari(executable_path=safaridriver,
                                capabilities=capabilities)
    else:
        raise Exception('Unknown/unsupported driver selected: ' + WEB_DRIVER)

    return browser_driver
Exemplo n.º 3
0
    def firefox(self, sign_test: bool):
        ff_webdriver = self.firefox_path()
        save_folder = self.save_folder()
        extensions = self.extensions_path(False)

        ff_profile = webdriver.FirefoxProfile()
        options = ff_options()

        if sign_test == False:
            options.headless = True
        else:
            ff_profile.add_extension(extension=extensions)
        options.set_preference("browser.download.folderList", 2)
        options.set_preference("browser.download.dir", save_folder)
        options.set_preference("browser.download.useDownloadDir", True)
        options.set_preference(
            "browser.download.viewableInternally.enabledTypes", "")
        options.set_preference(
            "browser.helperApps.neverAsk.saveToDisk",
            "application/pdf;text/plain;application/text;text/xml;application/xml"
        )
        options.set_preference("pdfjs.disabled", True)

        return webdriver.Firefox(executable_path=ff_webdriver,
                                 firefox_profile=ff_profile,
                                 options=options)
Exemplo n.º 4
0
    def create_driver(self, spider):
        """
        creates firefox and chrome drivers
        """
        # user_agent = random.choice(spider.user_agents)
        random_proxy = random.choice(
            spider.proxies) if spider.proxies else None
        options = ff_options()
        # options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-javascript')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--width=1460')
        options.add_argument('--height=780')
        firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
        firefox_capabilities['marionette'] = True
        if random_proxy:  # get a random proxy from spider proxies and set it in driver
            firefox_capabilities['proxy'] = {
                "proxyType": "MANUAL",
                "httpProxy": random_proxy,
                "ftpProxy": random_proxy,
                "sslProxy": random_proxy
            }
        profile = webdriver.FirefoxProfile()
        profile.set_preference("media.peerconnection.enabled", False)
        profile.set_preference("media.navigator.enabled", False)
        # profile.set_preference("general.useragent.override", user_agent)
        profile.update_preferences()

        driver = webdriver.Firefox(
            executable_path=settings.FIREFOX_WEB_DRIVER_PATH,
            capabilities=firefox_capabilities,
            firefox_profile=profile,
            firefox_options=options)
        return driver
Exemplo n.º 5
0
def browser():  #def browser(browser_mode):

    #TODO: refactor this part
    browser_mode = json_config_parse.get_browser()

    # Initialize WebDriver
    if browser_mode == 'chrome':
        options = ch_options()
        # options.add_argument('--headless')
        options.add_argument('start-maximized')
        driver = webdriver.Chrome(executable_path='chromedriver',
                                  options=options)
    elif browser_mode == 'firefox':
        options = ff_options()
        options.add_argument('--headless')
        options.add_argument('start-maximized')
        driver = webdriver.Firefox(executable_path='geckodriver',
                                   options=options)
    else:
        raise Exception('browser is not a supported browser')

    driver.implicitly_wait('5')

    yield driver
    driver.quit()
Exemplo n.º 6
0
    def create_driver(cls, random_proxy, user_agent, for_headers=False, webrtc=True):
        """
        creates firefox or chrome driver with given settings
        :param random_proxy:
        :param user_agent:
        :param for_headers:
        :param webrtc:
        :return:
        """
        if cls.browser == 'firefox':
            options = ff_options()
            options.add_argument('--headless')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-javascript')
            options.add_argument('--disable-dev-shm-usage')
            firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
            firefox_capabilities['marionette'] = True
            if random_proxy:
                firefox_capabilities['proxy'] = {
                    "proxyType": "MANUAL",
                    "httpProxy": random_proxy,
                    "ftpProxy": random_proxy,
                    "sslProxy": random_proxy
                }
            profile = webdriver.FirefoxProfile()
            profile.set_preference("media.peerconnection.enabled", False)
            profile.set_preference("media.navigator.enabled", False)
            profile.set_preference("general.useragent.override", user_agent)
            profile.update_preferences()

            if for_headers:
                driver = Firefox(executable_path=settings.FIREFOX_WEB_DRIVER_PATH, capabilities=firefox_capabilities,
                                 firefox_profile=profile, firefox_options=options)
            else:
                driver = webdriver.Firefox(executable_path=settings.FIREFOX_WEB_DRIVER_PATH,
                                           capabilities=firefox_capabilities,
                                           firefox_profile=profile, firefox_options=options)
        else:
            options = ch_options()
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-javascript')
            options.add_argument('--disable-dev-shm-usage')
            options.add_argument(f'user-agent={user_agent}')
            if random_proxy:
                options.add_argument(f'--proxy-server={random_proxy}')

            if webrtc:
                # options.add_extension(settings.WEB_DRIVER_EXTENSION_PATH)
                pass
            else:
                options.add_argument('--headless')

            if for_headers:
                driver = Chrome(settings.WEB_DRIVER_PATH, chrome_options=options)
            else:
                driver = webdriver.Chrome(settings.WEB_DRIVER_PATH, chrome_options=options)

        return driver
Exemplo n.º 7
0
 def finale(self, response):
     """
     It opens a browser and get description and url also
     :param response:
     :return:
     """
     options = ff_options()
     options.add_argument('--headless')
     driver = webdriver.Firefox(options=options)
     driver.get(response.url)
     driver.maximize_window()
     time.sleep(5)
     response1 = scrapy.Selector(text=driver.page_source)
     item = dict()
     item['url'] = response.url
     item['description'] = ''.join(
         response1.css(
             '.content.style-scope.ytd-video-secondary-info-renderer ::text'
         ).extract())
     writer.writerow(item)
     driver.close()
def create_driver(random_proxy, user_agent, for_headers=False, webrtc=True):
    """
    creates firefox or chrome driver with given settings
    :param random_proxy:
    :param user_agent:
    :param for_headers:
    :param webrtc:
    :return:
    """
    options = ff_options()
    # options.add_argument('--headless')
    firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
    firefox_capabilities['marionette'] = True
    if random_proxy:
        firefox_capabilities['proxy'] = {
            "proxyType": "MANUAL",
            "httpProxy": random_proxy,
            "ftpProxy": random_proxy,
            "sslProxy": random_proxy
        }
    driver = webdriver.Firefox(executable_path='C:\Windows\geckodriver', firefox_options=options,
                               capabilities=firefox_capabilities)
    return driver
Exemplo n.º 9
0
def create_driver(random_proxy=None):
    """
    creates firefox or chrome driver with given settings
    :param random_proxy:
    :param user_agent:
    :param for_headers:
    :param webrtc:
    :return:
    """
    options = ff_options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-javascript')
    options.add_argument('--disable-dev-shm-usage')
    user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0'
    firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX
    firefox_capabilities['marionette'] = True
    if random_proxy:
        firefox_capabilities['proxy'] = {
            "proxyType": "MANUAL",
            "httpProxy": random_proxy,
            "ftpProxy": random_proxy,
            "sslProxy": random_proxy
        }
    profile = webdriver.FirefoxProfile()
    profile.set_preference("media.peerconnection.enabled", False)
    profile.set_preference("media.navigator.enabled", False)
    profile.set_preference("general.useragent.override", user_agent)
    profile.update_preferences()

    driver = webdriver.Firefox(executable_path='./geckodriver',
                               capabilities=firefox_capabilities,
                               firefox_profile=profile,
                               firefox_options=options)

    return driver
Exemplo n.º 10
0
    def get_webdriver_instance(self):
        if self.base_url is None:
            self.base_url = 'www.camelodge.com'

        if self.browser == 'firefox':
            driver = webdriver.Firefox()
        # Todo Edge browser

        elif self.browser == 'safari':
            driver = webdriver.Safari(
                executable_path='/Applications/Safari.app/Contents/MacOS/Safari'
            )
        elif self.browser == 'chrome':
            chrome_driver = os.getcwd() + "/chromedriver"
            driver = webdriver.Chrome(executable_path=chrome_driver)
        elif self.browser == 'firefox_headless':
            firefox_options = ff_options()
            firefox_options.headless = True
            firefox_options.add_argument("--window_size=2560X1600")
            gecko_driver = os.getcwd() + "/geckodriver"
            driver = webdriver.Firefox(options=firefox_options,
                                       executable_path=gecko_driver)
        elif self.browser == 'chrome_headless':
            chrome_options = c_options()
            chrome_options.add_argument("--headless")
            chrome_options.add_argument("--window_size=2560X1600")
            chrome_driver = os.getcwd() + "/chromedriver"
            driver = webdriver.Chrome(options=chrome_options,
                                      executable_path=chrome_driver)
        else:
            driver = webdriver.Firefox()

        driver.maximize_window()
        driver.implicitly_wait(3)
        driver.get(url=self.base_url)
        return driver
Exemplo n.º 11
0
import csv
from scrapy.crawler import CrawlerProcess
from seleniumwire import webdriver
from selenium.webdriver.firefox.options import Options as ff_options
#from scraping import settings


Rfile=open('zipcode_zillow.txt','r')
inputfile=Rfile.read()

Wfile=open('zillowData.csv','w',encoding='utf-8',newline='')
csv_columns=['Price','Bedrooms','Bathrooms','Square feet','Address','Listing type','Zestimate','Est. payment:','Time on Zillow','Type:','Year built:','Heating:','Cooling:','Parking:','HOA:','Lot:','Price/sqft:','Rent Zestimate','Neighborhood stats','median Zestimate','Zillow link',]
writer=csv.DictWriter(Wfile,fieldnames=csv_columns)
writer.writeheader()

options = ff_options()
options.add_argument('--headless')
driver = webdriver.Firefox(firefox_options=options)


class zillow(scrapy.Spider):

    name = 'zillow'
    template_url = 'https://www.zillow.com/search/GetSearchPageState.htm?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A{}%7D%2C%22usersSearchTerm%22%3A%22{}%22%2C%22mapBounds%22%3A%7B%22west%22%3A-74.08536545581086%2C%22east%22%3A-73.9504396135257%2C%22south%22%3A40.69677215592377%2C%22north%22%3A40.77733747707232%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A61615%2C%22regionType%22%3A7%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22isForSaleByAgent%22%3A%7B%22value%22%3Afalse%7D%2C%22isForSaleByOwner%22%3A%7B%22value%22%3Afalse%7D%2C%22isNewConstruction%22%3A%7B%22value%22%3Afalse%7D%2C%22isForSaleForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isComingSoon%22%3A%7B%22value%22%3Afalse%7D%2C%22isAuction%22%3A%7B%22value%22%3Afalse%7D%2C%22isPreMarketForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isPreMarketPreForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isForRent%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%7D&wants={%22cat1%22:[%22listResults%22,%22mapResults%22,%22total%22]}&requestId={}'
    request_count = 1

    def close_driver(cls, driver):
        driver.quit()

    def start_requests(self):
Exemplo n.º 12
0
    def getWebDriverInstance(self):
        """
       Get WebDriver Instance based on the browser configuration
       For Bamboo integration need to use the webdriver manager to install the webdrivers on runtime
       instead of using the path on local machine:
       https://github.com/SergeyPirogov/webdriver_manager

        Returns:
            'WebDriver Instance'
        """
        """"
        # Location where I save the drivers on my local machine
        chromeDriverLocation = "C:\\Users\\nhussein\\PycharmProjects\\chromedriver.exe"
        ffDriverLocation = "C:\\Users\\nhussein\\PycharmProjects\\geckodriver.exe"
        ieDriverLocation = "C:\\Users\\nhussein\\PycharmProjects\\IEDriverServer.exe"
        """

        if self.browser == "ie":
            # Set ie driver
            #os.environ["webdriver.ie.driver"] = ieDriverLocation
            #driver = webdriver.Ie(ieDriverLocation)
            #install IE driver to default path C:\Users\nhussein\.wdm\IEDriverServer\3.141.5\Win32
            driver = webdriver.Ie(IEDriverManager(os_type="win32").install())

        elif self.browser == "edge":
            #default installation folder C:\Users\nhussein\.wdm\MicrosoftWebDriver\latest\win
            driver = webdriver.Edge(EdgeDriverManager().install())

        elif self.browser == "ff":
            #driver = webdriver.Firefox()
            # default installation path C:\Users\nhussein\.wdm\geckodriver\v0.23.0\win64
            driver = webdriver.Firefox(
                executable_path=GeckoDriverManager().install())

        elif self.browser == "ffdocker":
            #driver = webdriver.Firefox()
            capabilities = DesiredCapabilities.FIREFOX.copy()
            driver = webdriver.Remote("http://127.0.0.1:4446/wd/hub",
                                      capabilities)

        elif self.browser == "ffheadless":
            ffDriverLocation = "C:\TEMP\geckodriver.exe"
            webdriver.Firefox(executable_path=GeckoDriverManager().install(
                path="C:\TEMP"))
            options = ff_options()
            options.headless = True
            driver = webdriver.Firefox(options=options,
                                       executable_path=ffDriverLocation)

        elif self.browser == "chrome":
            # Set chrome driver
            #os.environ["webdriver.chrome.driver"] = chromeDriverLocation
            #driver = webdriver.Chrome(chromeDriverLocation)
            driver = webdriver.Chrome(ChromeDriverManager().install())
            driver.set_window_size(1920, 1080)

        elif self.browser == "chromedocker":
            # Set chrome driver
            #driverLocation = "C:\\Users\\nhussein\\PycharmProjects\\selenium_workspace\\chromedriver.exe"
            #os.environ["webdriver.chrome.driver"] = chromeDriverLocation
            ####### THIS WILL USE DOCKER CONTAINER AND LAUNCH THE SCRIPT ON VNC ##########################3##########
            capabilities = DesiredCapabilities.CHROME.copy()
            #capabilities['platform'] = "WINDOWS"
            #capabilities['version'] = "10"
            capabilities['takesScreenshot'] = True
            driver = webdriver.Remote("http://127.0.0.1:4446/wd/hub",
                                      capabilities)
            driver.set_window_size(1920, 1080)

        elif self.browser == "chromeheadless":
            chromeDriverLocation = "C:\TEMP\chromedriver.exe"
            webdriver.Chrome(ChromeDriverManager().install(path="C:\TEMP"))

            # To use the default driver installation path comment out the above 2 lines and uncomment the below 2 lines

            #webdriver.Chrome(ChromeDriverManager().install())
            #chromeDriverLocation = str(self.getHomeDirectory())+"\.wdm\chromedriver\\2.45\win32\chromedriver.exe"

            options = chrome_options()
            options.headless = True
            driver = webdriver.Chrome(chromeDriverLocation,
                                      chrome_options=options)

        elif self.browser == "mobile":
            # Select which device you want to emulate by uncommenting it
            # More information at: https://sites.google.com/a/chromium.org/chromedriver/mobile-emulation
            mobile_emulation = {
                "deviceName": "iPhone 6/7/8"
                # "deviceName": "iPhone 6/7/8 Plus"
                # "deviceName": "iPhone X"
                # "deviceName": "iPad"
                # "deviceName": "iPad Mini"
                # "deviceName": "iPad Pro"
                # "deviceName": "Nexus 10"
                # "deviceName": "Galaxy S III"
                # "deviceName": "Galaxy Note 3"
                # Or specify a specific build using the following two arguments
                # "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
                # "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"
            }
            # Define a variable to hold all the configurations we want
            options = chrome_options()
            # Add the mobile emulation to the chrome options variable
            options.add_experimental_option("mobileEmulation",
                                            mobile_emulation)
            # Create driver, pass it the path to the chromedriver file and the special configurations you want to run
            chromeDriverLocation = "C:\TEMP\chromedriver.exe"
            webdriver.Chrome(ChromeDriverManager().install(path="C:\TEMP"))
            driver = webdriver.Chrome(chromeDriverLocation,
                                      chrome_options=options)

        else:
            #driver = webdriver.Firefox()
            driver = webdriver.Firefox(
                executable_path=GeckoDriverManager().install())

        # Setting Driver Implicit Time out for An Element
        driver.implicitly_wait(3)
        # Maximize the window
        driver.maximize_window()

        #selecting the URL based on the environment param
        #env = self.environment.lower()
        if self.environment == 'qa':
            baseURL = "https://portal.qa.aws.wfscorp.com/"
        elif self.environment == 'test':
            baseURL = "https://portal.test.aws.wfscorp.com/"
        elif self.environment == 'dev':
            baseURL = "https://portal.dev.aws.wfscorp.com/"
        else:
            baseURL = "https://portal.qa.aws.wfscorp.com/"

        # Loading browser with App URL

        driver.get(baseURL)
        return driver