def browser(request):
    browser = request.config.getoption("--browser")
    headless = request.config.getoption("--headless")
    maximized = request.config.getoption("--maximized")

    driver = None

    if browser == "chrome":
        options = webdriver.ChromeOptions()
        if headless: options.headless = True
        driver = webdriver.Chrome(options=options)

    elif browser == "firefox":
        options = webdriver.FirefoxOptions()
        if headless: options.headless = True
        driver = webdriver.Firefox(options=options)

    elif browser == "opera":
        options = OperaOptions()
        if headless: options.headless = True
        driver = webdriver.Opera(options=options)

    elif browser == "edge":
        driver = webdriver.Edge(
            "C:\\Users\\Mikhail\\Downloads\\driver\\msedgedriver.exe")

    if maximized:
        driver.maximize_window()

    return driver
示例#2
0
def create_local_driver(request):
    drivers_path = request.config.getoption("--drivers_path")
    headless = request.config.getoption("--headless")
    browser = request.config.getoption("--browser")

    if browser == "chrome":
        options = webdriver.ChromeOptions()
        options.headless = headless
        driver = webdriver.Chrome(executable_path=drivers_path +
                                  "/chromedriver",
                                  options=options)

    elif browser == "opera":
        if headless:
            raise NotImplementedError("This mode is not supported")

        options = Options()
        driver = webdriver.Opera(executable_path=drivers_path + "/operadriver",
                                 options=options)
    elif browser == "firefox":
        options = webdriver.FirefoxOptions()
        options.headless = headless
        driver = webdriver.Firefox(executable_path=drivers_path +
                                   "/geckodriver",
                                   options=options)
    else:
        raise ValueError("Browser is not supported")

    request.addfinalizer(driver.quit)

    return driver
示例#3
0
    def test_cartCost(self):
        opts = Options()
        opts.headless = True
        assert opts.headless  # без графического интерфейса.
        browser = Opera(options=opts)

        browser.get('https://luch.by/kollektsii/volat-2-0/728377624/')
        item1Price = int(browser.find_element_by_class_name('styled-price').text[:-4])
        browser.find_element_by_class_name('button_add').click()

        browser.get('https://luch.by/kollektsii/classic/76730567/')
        item2Price = int(browser.find_element_by_class_name('styled-price').text[:-4])
        browser.find_element_by_class_name('button_add').click()

        browser.get('https://luch.by/cart/')
        total = int(browser.find_element_by_class_name('styled-price').text[:-4])
        self.assertEqual(total, item1Price+item2Price)
示例#4
0
def browser(request):
    browser = request.config.getoption("--browser")

    driver = None

    if browser == "chrome":
        options = webdriver.ChromeOptions()
        # options.headless = True
        driver = webdriver.Chrome(options=options)

    elif browser == "firefox":
        options = webdriver.FirefoxOptions()
        options.headless = True
        driver = webdriver.Firefox(options=options)

    elif browser == "opera":
        options = OperaOptions()
        options.headless = True
        driver = webdriver.Opera(options=options)

    driver.maximize_window()

    yield driver
    driver.quit()
示例#5
0
from selenium.webdriver.opera.options import Options
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
import json

#1. Pegar conteudo HTML a partir da URL
url = "https://stats.nba.com/players/traditional/?PerMode=Totals&Season=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1"
local = r"C:\Users\Usuario\Documents\operadriver.exe"  #precisa do 'r' pra indicar o caminho
option = Options()  #instaciando a classe Option
option.headless = False  #headless é pra nao mostra o programa aberto
driver = webdriver.Opera(executable_path=local, options=option)
driver.get(url)

time.sleep(10)

driver.find_element_by_xpath(
    "//div[@class='nba-stat-table']//table//thead//tr//th[@data-field='PTS']"
).click()  #Procura o caminho do elemento e clica nele

element = driver.find_element_by_xpath("//div[@class='nba-stat-table']//table")
html_content = element.get_attribute('outerHTML')

# 2. Parsear o conteúdo HTML - BeatifulSoup
#Faz anpalise do html e tranforma num dado estruturado
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find(name='table')

# 3. Estruturar conteudo em um Data Frame - Pandas
示例#6
0
# simulate clicks and typing
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.select import Select  # verify if tag is a 'select'
from selenium.webdriver.support.ui import WebDriverWait  # makes webdrive wait page load
from selenium.webdriver.support import expected_conditions as ec  # verify if expected conditions is valid
from selenium.webdriver.opera.options import Options  # opera webdriver options
from webdriver_manager.opera import OperaDriverManager  # opera webdriver manager
import json  # manipulate JSON files
from xhtml2pdf import pisa  # to build PDF with HTML
""" get HTML content """

url = "https://www.nba.com/stats/players/traditional/?sort=PLAYER_NAME&dir=-1"  # NBA tabledata URL

option = Options()
option.headless = True  # execute without graphical interface

driver = webdriver.Opera(
    options=option)  # call the webdriver to open the browser and get the URL
#driver = webdriver.Opera(OperaDriverManager().install(), options=option) # call the webdriver - installed on cache by driver manager - to open the browser and get the URL
driver.get(url)
time.sleep(10)  # delay do get data

driver.find_element_by_xpath(
    "//div[@class='nba-stat-table']//table//thead//tr//th[@data-field='PTS']"
).click()  # emulate click to sort by poits
element = driver.find_element_by_xpath("//div[@class='nba-stat-table']//table")

html_content = element.get_attribute("outerHTML")  # html table content
""" parse to BS4 HTML structure """
示例#7
0
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2866.71 Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2919.83 Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686 on x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2820.59 Safari/537.36"
    ]

headers = {
    'User-Agent': random.choice(uastrings),
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip',
    'DNT': '1'
}

options = Options()
options.headless = False
options.add_argument("--incognito")

driver = webdriver.Opera(options=options, executable_path=DRIVER_PATH)

url_temp = "https://tuoitre.vn/" + 'the-gioi' + "/trang-" + str(100 - 1) + ".htm"
driver.get(url_temp)
driver.implicitly_wait(30)
click = 1
try:

    while click < 1000:

        try:
            driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CLASS_NAME,"btn-readmore"))))
示例#8
0
from selenium.webdriver.opera.options import Options
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
import json

# 1 - Pegar o conteudo HTML
url = "https://covid.saude.gov.br"
operadrive = r"C:\Users\Usuario\Documents\operadriver.exe"
option = Options()
option.headless = True
drive = webdriver.Opera(executable_path=operadrive, options=option)
drive.get(url)  #Faz o request
time.sleep(10)
element = drive.find_element_by_xpath(
    "/html/body/app-root/ion-app/ion-router-outlet/app-home/ion-content/painel-geral-component/div/div[1]/div/div[1]"
)
tabelaHTML = element.get_attribute('outerHTML')

# 2 - Parsear o HTML
soup = BeautifulSoup(tabelaHTML, 'html.parser')
table = soup.find('lista-sanfona-component')

# 3 - Criar o dataframe
df_full = pd.read_html(str(table))[0].head()
print(df_full)
drive.quit()