def browser(request): browser = request.config.getoption("--browser") headless = request.config.getoption("--headless") maximized = request.config.getoption("--maximized") driver = None if browser == "chrome": options = webdriver.ChromeOptions() if headless: options.headless = True driver = webdriver.Chrome(options=options) elif browser == "firefox": options = webdriver.FirefoxOptions() if headless: options.headless = True driver = webdriver.Firefox(options=options) elif browser == "opera": options = OperaOptions() if headless: options.headless = True driver = webdriver.Opera(options=options) elif browser == "edge": driver = webdriver.Edge( "C:\\Users\\Mikhail\\Downloads\\driver\\msedgedriver.exe") if maximized: driver.maximize_window() return driver
def create_local_driver(request): drivers_path = request.config.getoption("--drivers_path") headless = request.config.getoption("--headless") browser = request.config.getoption("--browser") if browser == "chrome": options = webdriver.ChromeOptions() options.headless = headless driver = webdriver.Chrome(executable_path=drivers_path + "/chromedriver", options=options) elif browser == "opera": if headless: raise NotImplementedError("This mode is not supported") options = Options() driver = webdriver.Opera(executable_path=drivers_path + "/operadriver", options=options) elif browser == "firefox": options = webdriver.FirefoxOptions() options.headless = headless driver = webdriver.Firefox(executable_path=drivers_path + "/geckodriver", options=options) else: raise ValueError("Browser is not supported") request.addfinalizer(driver.quit) return driver
def test_cartCost(self): opts = Options() opts.headless = True assert opts.headless # без графического интерфейса. browser = Opera(options=opts) browser.get('https://luch.by/kollektsii/volat-2-0/728377624/') item1Price = int(browser.find_element_by_class_name('styled-price').text[:-4]) browser.find_element_by_class_name('button_add').click() browser.get('https://luch.by/kollektsii/classic/76730567/') item2Price = int(browser.find_element_by_class_name('styled-price').text[:-4]) browser.find_element_by_class_name('button_add').click() browser.get('https://luch.by/cart/') total = int(browser.find_element_by_class_name('styled-price').text[:-4]) self.assertEqual(total, item1Price+item2Price)
def browser(request): browser = request.config.getoption("--browser") driver = None if browser == "chrome": options = webdriver.ChromeOptions() # options.headless = True driver = webdriver.Chrome(options=options) elif browser == "firefox": options = webdriver.FirefoxOptions() options.headless = True driver = webdriver.Firefox(options=options) elif browser == "opera": options = OperaOptions() options.headless = True driver = webdriver.Opera(options=options) driver.maximize_window() yield driver driver.quit()
from selenium.webdriver.opera.options import Options import time import requests import pandas as pd from bs4 import BeautifulSoup from selenium import webdriver import json #1. Pegar conteudo HTML a partir da URL url = "https://stats.nba.com/players/traditional/?PerMode=Totals&Season=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1" local = r"C:\Users\Usuario\Documents\operadriver.exe" #precisa do 'r' pra indicar o caminho option = Options() #instaciando a classe Option option.headless = False #headless é pra nao mostra o programa aberto driver = webdriver.Opera(executable_path=local, options=option) driver.get(url) time.sleep(10) driver.find_element_by_xpath( "//div[@class='nba-stat-table']//table//thead//tr//th[@data-field='PTS']" ).click() #Procura o caminho do elemento e clica nele element = driver.find_element_by_xpath("//div[@class='nba-stat-table']//table") html_content = element.get_attribute('outerHTML') # 2. Parsear o conteúdo HTML - BeatifulSoup #Faz anpalise do html e tranforma num dado estruturado soup = BeautifulSoup(html_content, 'html.parser') table = soup.find(name='table') # 3. Estruturar conteudo em um Data Frame - Pandas
# simulate clicks and typing from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.select import Select # verify if tag is a 'select' from selenium.webdriver.support.ui import WebDriverWait # makes webdrive wait page load from selenium.webdriver.support import expected_conditions as ec # verify if expected conditions is valid from selenium.webdriver.opera.options import Options # opera webdriver options from webdriver_manager.opera import OperaDriverManager # opera webdriver manager import json # manipulate JSON files from xhtml2pdf import pisa # to build PDF with HTML """ get HTML content """ url = "https://www.nba.com/stats/players/traditional/?sort=PLAYER_NAME&dir=-1" # NBA tabledata URL option = Options() option.headless = True # execute without graphical interface driver = webdriver.Opera( options=option) # call the webdriver to open the browser and get the URL #driver = webdriver.Opera(OperaDriverManager().install(), options=option) # call the webdriver - installed on cache by driver manager - to open the browser and get the URL driver.get(url) time.sleep(10) # delay do get data driver.find_element_by_xpath( "//div[@class='nba-stat-table']//table//thead//tr//th[@data-field='PTS']" ).click() # emulate click to sort by poits element = driver.find_element_by_xpath("//div[@class='nba-stat-table']//table") html_content = element.get_attribute("outerHTML") # html table content """ parse to BS4 HTML structure """
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2866.71 Safari/537.36", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2919.83 Safari/537.36", "Mozilla/5.0 (X11; Ubuntu; Linux i686 on x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2820.59 Safari/537.36" ] headers = { 'User-Agent': random.choice(uastrings), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip', 'DNT': '1' } options = Options() options.headless = False options.add_argument("--incognito") driver = webdriver.Opera(options=options, executable_path=DRIVER_PATH) url_temp = "https://tuoitre.vn/" + 'the-gioi' + "/trang-" + str(100 - 1) + ".htm" driver.get(url_temp) driver.implicitly_wait(30) click = 1 try: while click < 1000: try: driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.CLASS_NAME,"btn-readmore"))))
from selenium.webdriver.opera.options import Options import time import requests import pandas as pd from bs4 import BeautifulSoup from selenium import webdriver import json # 1 - Pegar o conteudo HTML url = "https://covid.saude.gov.br" operadrive = r"C:\Users\Usuario\Documents\operadriver.exe" option = Options() option.headless = True drive = webdriver.Opera(executable_path=operadrive, options=option) drive.get(url) #Faz o request time.sleep(10) element = drive.find_element_by_xpath( "/html/body/app-root/ion-app/ion-router-outlet/app-home/ion-content/painel-geral-component/div/div[1]/div/div[1]" ) tabelaHTML = element.get_attribute('outerHTML') # 2 - Parsear o HTML soup = BeautifulSoup(tabelaHTML, 'html.parser') table = soup.find('lista-sanfona-component') # 3 - Criar o dataframe df_full = pd.read_html(str(table))[0].head() print(df_full) drive.quit()