def init(): # Note: I am not sure this is better, or using the data_store.suite is better... global server server = GraylogServer() global driver driver = Firefox()
def __init__(self): self.driver = Firefox()
import time from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options opts = Options() opts.headless = True # assert opts.headless # Operating in headless mode browser = Firefox(options=opts) browser.get('https://duckduckgo.com') search_form = browser.find_element_by_id('search_form_input_homepage') search_form.send_keys('real python') search_form.submit() time.sleep(1) results = browser.find_elements_by_class_name('result') print(results[0].text) browser.close() quit()
def download_collection(collection_id, args): # collection_id is the last part of the url url = 'https://3dwarehouse.sketchup.com/collection.html?id=%s' % collection_id collection_dir = op.join(CAD_DIR, collection_id) logging.info('will download coleection_id: %s' % collection_id) # if collection exists collection_path = op.join(collection_dir, README_NAME) if op.exists(collection_path): # if 'overwrite' enabled, remove everything and write from scratch if args.overwrite_collection: shutil.rmtree(collection_dir) models_info = [] else: # if 'overwrite' disabled, try to read what was downloaded try: collection_info = json.load(open(collection_path)) models_info = collection_info['vehicles'] if models_info is None: models_info = [] # if 'overwrite' disabled and can't read/parse the readme except: raise Exception('Failed to parse the collection due to: %s' % sys.exc_info()[0]) else: models_info = [] if not op.exists(op.join(collection_dir, 'skp')): os.makedirs(op.join(collection_dir, 'skp')) profile = FirefoxProfile() profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference("browser.download.dir", op.join(collection_dir, 'skp', 'tmp')) if not op.exists(op.join(collection_dir, 'skp', 'tmp')): os.makedirs(op.join(collection_dir, 'skp', 'tmp')) profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/vnd.koan") with closing(Firefox(profile)) as browser: # open the page with collection browser.get(url) WebDriverWait(browser, timeout=args.timeout).until( lambda x: x.find_elements_by_class_name('results-entity-link')) # get collection name element = browser.find_element_by_id('title') print(element.text.encode('ascii', 'ignore').decode("utf-8")) collection_name = validateString( element.text.encode('ascii', 'ignore').decode("utf-8")) # get collection description element = browser.find_element_by_id('description') collection_description = validateString( element.text.encode('ascii', 'ignore').decode("utf-8")) # get collection tags #element = browser.find_element_by_id('tags') #element.find_element_by_xpath(".//p[@id='test']").text #collection_name = element.text.encode('ascii','ignore').decode("utf-8") #collection_name = validateString(collection_name) # get author element = browser.find_element_by_id('collection-author') author_href = element.get_attribute('href') author_id = author_href.split('=')[-1] author_name = validateString( element.text.encode('ascii', 'ignore').decode("utf-8")) # keep scrolling the page until models show up (for pages with many models) prev_number = 0 while True: browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") elements = browser.find_elements_by_class_name( 'results-entity-link') logging.info('found %d models' % len(elements)) if prev_number == len(elements): break else: prev_number = len(elements) time.sleep(1) # get the model urls model_urls = [] for element in elements: model_url = element.get_attribute('href') model_urls.append(model_url) # download all models new_models_info = download_all_models(browser, model_urls, models_info, collection_id, collection_dir) collection_info = { 'collection_id': collection_id, 'collection_name': collection_name, 'author_id': author_id, 'author_name': author_name, 'vehicles': new_models_info } with open(op.join(collection_dir, README_NAME), 'w') as f: f.write(json.dumps(collection_info, indent=4))
#!/bin/python3.7 from selenium.webdriver import Firefox, FirefoxOptions from time import sleep from pandas import DataFrame opt = FirefoxOptions() opt.headless = True driver = Firefox(options=opt) # adjust this number for slower connections wait = 1 print("getting page") driver.get("https://www.animalcrossingitemlist.com/list/all-items/") print(f"waiting {wait} seconds...") sleep(wait) # this will overwrite items.csv with a more up to date version frame = DataFrame(columns=['Name', 'Sell Price', 'Buy Price']) has_more = True while has_more: buttons = driver.find_elements_by_class_name("v-btn__content") table = driver.find_element_by_tag_name("tbody") for tr in table.find_elements_by_tag_name("tr"): tds = tr.find_elements_by_tag_name("td") name = tds[1].text sell = tds[2].text if sell not in ['Cannot', '???']: sell = int(sell.replace(',','')) buy = tds[3].text
from selenium.webdriver import Firefox from time import sleep # url_drive = executable_path=r"./../driver/chromedriver.exe" driver = Firefox() url = "https://curso-python-selenium.netlify.app/aula_03.html" driver.get(url) sleep(5) a = driver.find_element_by_tag_name("a") for i in range(4): a.click() p = driver.find_elements_by_tag_name("p") for info in p: print(info.text) driver.quit()
temp_data['authors'] = "" check(temp_data, url) paper_details.append(temp_data) if len(paper_details): insert_mongo("Chemistry", "chemical_communications", paper_details) if len(paper_details) < 5: print len(paper_details), url finally: return url2 if __name__ == "__main__": options = Options() options.add_argument('-headless') driver = Firefox(executable_path='geckodriver', firefox_options=options) limit = 2 driver.set_page_load_timeout(limit) driver.set_script_timeout(limit) drop_mongo("Chemistry", "chemical_communications") url = "http://pubs.rsc.org/en/journals/journalissues/cc?_ga=2.201445330.2121483789.1516728316-237121058.1516728316#!issueid=cc054008&type=current&issnprint=1359-7345" # url="http://pubs.rsc.org//en/journals/journal/cc?issueid=cc009038&issnprint=1359-7345" #url="http://pubs.rsc.org/en/journals/journal/cc?issueid=cc054008&issnprint=1359-7345" #url="http://pubs.rsc.org/en/journals/journalissues/cc?_ga=2.201445330.2121483789.1516728316-2#!issueid=cc054009&type=current&issnprint=1359-7345" urlend = "http://pubs.rsc.org/en/journals/journalissues/cc#!issueid=cc1996_0_0&type=current&issnprint=1359-7345" while url and url != urlend: url = getPage(url) print "FINISHED AT", url
from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.firefox.firefox_binary import FirefoxBinary print("Masukkan Npm Anda:") npm = input() print("Masukkan Password SIAP Anda:") paswd = input('') opsi = Options() opsi.headless = False binary = FirefoxBinary("C:\\Program Files\\Mozilla Firefox\\firefox.exe") cap = DesiredCapabilities().FIREFOX cap['marionette'] = True browser = Firefox(executable_path='geckodriver.exe', options=opsi, capabilities=cap, firefox_binary=binary) browser.get('http://siap.poltekpos.ac.id/siap/besan.depan.php') name = browser.find_element_by_name('user_name') word = browser.find_element_by_name('user_pass') login = browser.find_element_by_name('login') name.send_keys(npm) word.send_keys(paswd) login.click()
from requests import get from requests.exceptions import RequestException from contextlib import closing from bs4 import BeautifulSoup from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options opts = Options() opts.set_headless() assert opts.headless # Operating in headless mode browser = Firefox(executable_path='./geckodriver', options=opts) import json import string import time from os import path import logging logger = logging.getLogger('scraper') logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() formatter = logging.Formatter('%(name)s:%(levelname)s: %(message)s') ch.setLevel(logging.ERROR) ch.setFormatter(formatter) logger.addHandler(ch) fh = logging.FileHandler('scraper.log', mode='w') fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh)
from selenium.webdriver import Firefox from bs4 import BeautifulSoup from urllib.parse import quote import csv from datetime import datetime ''' 1) naver 접속 2) login id(id = id) / pwd(id = pw) 후 로그인 버튼(class = btn_login submit) 3) cafe 아이콘 클릭 (class : tab_cafe) 4) 카페 리스트(class : svc_list/unread) 에서 http://cafe.naver.com/kookminlease 카페 접속 5) 접속 후 검색어(name = query) 에 각 단지 입력 후 검색(class : btn-search-green) ''' base_url = 'http://cafe.naver.com/kookminlease.cafe?iframe_url=' ## chrome 실행 chrome = Firefox() chrome.get('http://naver.com') ## login 위한 id/pwd/login button 정보 get id_txt = chrome.find_element_by_xpath('.//input[@id="id"]') pw_txt = chrome.find_element_by_xpath('.//input[@id="pw"]') login_btn = chrome.find_element_by_xpath('.//span[@class="btn_login"]') ## id/pwd 정보 set 후 login id_txt.send_keys('korea7030') pw_txt.send_keys('akachiki10!') login_btn.submit() chrome.implicitly_wait(50) cafe_url = "http://cafe.naver.com/kookminlease" page_url = "http://cafe.naver.com"
def do_cape_crawl(department='all', detailed=False, headless=True, geckodriver_path='./geckodriver', output_dir="."): ''' Crawls information for all courses in various departments on CAPE webpage :param department: specific department whose data to crawl, default:'all' :type department: str :type detailed: form of data scraping. Detailed are much slower. default:False :type detailed: bool :param headless: mode in which to launch browser, default:True :type headless: bool :param geckodriver_path: path to the geckodriver executable, default:'./geckodriver' :type geckodriver_path: str :param output_dir: dir where to place the generated csv, default:'.'' :type output_dir: str ''' assert isinstance(department, str), "department should be a str" assert isinstance(headless, bool), "headless should be a bool" assert isinstance(geckodriver_path, str) and path.exists( geckodriver_path), f"geckodriver does not exist at {geckodriver_path}" assert isinstance( output_dir, str) and path.isdir(output_dir), f"Not a valid directory:{output_dir}" department = department.upper() options = Options() options.headless = headless driver = Firefox(options=options, executable_path=geckodriver_path) driver.get("https://cape.ucsd.edu/responses/Results.aspx") print('Firefox Initialized in %s mode' % ('headless' if headless else 'head')) initiate_sso_login(driver) # # we expect a security warning # # try: # WebDriverWait(driver, 120).until(EC.alert_is_present(), # 'Timed out waiting for a successful SSO login. Check credentials?') # #Switch the control to the Alert window # alert = driver.switch_to.alert # #Retrieve the message on the Alert window # msg = alert.text # #use the accept() method to accept the alert # alert.accept() # print(f"Alert Accepted:\n{' '.join(msg.strip().split())}") # # except NoAlertPresentException: # # raise # # except TimeoutException: # # pass # wait for cape page to load after SSO status = WebDriverWait(driver, 30, poll_frequency=0.5).until( lambda x: x.find_element_by_id( 'ctl00_ContentPlaceHolder1_ddlDepartments'), 'Timed out waiting for SSO -> CAPE webpage loading. Check credentials.' ) print("*********** Successfully logged in ***********") select_dept = Select( driver.find_element_by_id('ctl00_ContentPlaceHolder1_ddlDepartments')) departments = [ o.text for o in select_dept.options if o.text != "Select a Department" ] departments_keyworded = [dept.split()[0] for dept in departments] assert department in departments_keyworded or department == 'ALL', f"invalid department supplied. Should be one of {departments_keyworded} or ALL" if department != 'ALL': departments = [departments[departments_keyworded.index(department)]] total_departments = len(departments) print("") # select departments one at a time and create their csv for index, dept in enumerate(departments): dept_keyword = dept.split()[0] # if dept_keyword not in ['INTL','ICAM', 'ERC','TMC', 'RELI', ]: # continue # if dept_keyword in ['FILM','ESYS','DOC','CGS','CENG','ENVR','FPMU','JUDA','LATI','TWS','LAWS','REV','WARR','SDCC','MUIR','HMNR','CONT','SXTH','STPA']: # continue select_dept.select_by_visible_text(dept) # click on submit button driver.find_element_by_id( 'ctl00_ContentPlaceHolder1_btnSubmit').click() # there's a small delay between click and this element getting displayed # returns true status = WebDriverWait( driver, 30, poll_frequency=0.5).until(lambda x: x.find_element_by_id( 'ctl00_ContentPlaceHolder1_UpdateProgress1').is_displayed()) # shoud return false now status = WebDriverWait( driver, 30, poll_frequency=0.5).until_not(lambda x: x.find_element_by_id( 'ctl00_ContentPlaceHolder1_UpdateProgress1').is_displayed()) assert status == False, "Page is still loading!" try: rows = get_parsed_rows_cape(dept=dept_keyword, dept_num=(index + 1), total_dept=total_departments, html_source=driver.page_source, driver=driver, detailed=detailed) if (len(rows) <= 1): print(f"Didn't find any valid row for {dept_keyword}!") else: f = open(f"{output_dir}/./cape_{dept_keyword}_auto.csv", 'w') writer = csv.writer(f) for row in rows: writer.writerow(row) f.close() stdout.write('\r') # exclude header print_string = f"{dept_keyword} results({len(rows)-1} courses)" # need to pad with spaces to keep the above string of fixed length, otherwise remains are not cleaned from screen stdout.write( f"Successfully parsed departments({index+1}/{total_departments}), {'{s:{c}^{n}}'.format(s=print_string,n=40,c=' ')}" ) stdout.flush() except AssertionError as e: print( f"caught exception {e} in get_parsed_rows_cape for {dept_keyword}!" ) #no need to write row in this case print("") driver.quit()
def _start_firefox_driver(self, headless): firefox_options = self._get_firefox_options(headless) kwargs = self._get_firefox_driver_kwargs(firefox_options) result = Firefox(**kwargs) atexit.register(self._kill_service, result.service) return result
#import urllib2 import urllib.request, urllib.parse, urllib.error import re from bs4 import BeautifulSoup import unicodedata import pandas as pd data_alll = [] def remove_non_ascii_1(text): return ''.join([i if ord(i) < 128 else ' ' for i in text]) with closing(Firefox()) as browser: site = "https://www.flipkart.com/redmi-note-5-pro-black-64-gb/product-reviews/itmf2fc3xgmxnhpx?page=1&pid=MOBF28FTQPHUPX83" browser.get(site) file = open("Review.txt", "w") for count in range(1, 11): nav_btns = browser.find_elements_by_class_name('_33m_Yg') button = "" for btn in nav_btns: number = int(btn.text) if (number == count): button = btn break
def get_page_with_selenium(query, w=True): # COMMENT """ geckodriver&firefox: https://elements.heroku.com/buildpacks/evosystem-jp/heroku-buildpack-firefox add RedosToGo on Heroku :param query: :param w: :return: """ yandex_dict = {} opts = Options() opts.set_headless() # binary = FirefoxBinary('/app/vendor/firefox/firefox') # driver = Firefox(options=opts, firefox_binary=binary, executable_path='/app/vendor/geckodriver/geckodriver') # driver = Firefox(options=opts, firefox_binary=binary) driver = Firefox(options=opts) driver.wait = WebDriverWait(driver, 5) driver.get('https://www.yandex.ru') html = driver.page_source if w: print(1) with open('index.html', 'w') as f: f.write(html) # try: # # # # # box = driver.find_element_by_id('text') # # # box = driver.wait.until(EC.presence_of_element_located( # (By.ID, 'text'), # )) # # print(12) # # button = driver.find_element_by_class_name('suggest2-form__button') # button = driver.wait.until(EC.presence_of_element_located( # (By.CLASS_NAME, 'suggest2-form__button') # )) # box.send_keys(query) # button.click() # except TimeoutException: # print('Box or Button didnt find') sent = '"' + query[0] + '"' box = driver.find_element_by_name('text') # button = driver.find_element_by_class_name('suggest2-form__button') # button = driver.find_element_by_class_name('button mini-suggest') button = driver.find_element_by_class_name('search2__button') box.send_keys(sent) button.click() yandex_dict[sent] = driver.page_source for sent in query[1:]: close = driver.find_element_by_class_name('input__clear') close.click() box = driver.find_element_by_name('text') button = driver.find_element_by_class_name('websearch-button') sent = '"' + sent + '"' box.send_keys(sent) button.click() yandex_dict[sent] = driver.page_source html = driver.page_source driver.close() if w: print(1) with open('index.html', 'w') as f: f.write(html) return yandex_dict
#immportação da url from selenium.webdriver import Firefox from time import sleep #url para pesquisa url = 'https://curso-python-selenium.netlify.app/aula_03.html' #abrindo o navegador navegador = Firefox() #pesquisando na rede mundial de computadores navegador.get(url) sleep(6) #pegando elemento a = navegador.find_element_by_tag_name('a') p = navegador.find_element_by_tag_name('p') #Clicando 3 vezes n = 1 for click in range(10): ps = navegador.find_elements_by_tag_name('p') a.click() print(f'Valor de {ps[-1].text}') print(f'Os valores são iguais {ps[-1].text == str(click)}') #printando texro de a e de p #print(f'texto de a: {a.text}') #print(f'texto de p: {p.text}') #Fechando o navegador
def ObtenerDataFacebook(dato): busqueda = dato opts = Options() opts.headless = True diract = os.path.dirname( os.path.abspath(__file__)) #Directorio donde se encuentra el script print("Obteniendo informacion para " + busqueda) browser = Firefox(executable_path=diract + "/assets/geckodriver.exe", options=opts) urlgetx = "https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=GT&q=" + dato + "&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=keyword_unordered" browser.get(urlgetx) time.sleep(3) #------Listas element = browser.find_elements_by_class_name('_9ccv') for e in element: if e != element[0]: browser.execute_script( """ var element = arguments[0]; element.parentNode.removeChild(element); """, e) card = browser.find_elements_by_class_name('_99s5') cn = 0 for c in card: tipo = "Imagen" try: fs = c.find_element_by_class_name('_9cd3').text except: pass try: aus = c.find_element_by_class_name('_8nqq') ausi = aus.get_attribute('src') aust = aus.get_attribute('alt') except: ausi = "" aust = "No data" pass try: ds = c.find_elements_by_class_name('_4ik4') ds = ds[1].text except: ds = "No hay Descripción" pass try: cs = c.find_element_by_class_name('_7jys').get_attribute('src') tipo = "Imagen" except: pass try: cs = c.find_element_by_tag_name('video').get_attribute('src') tipo = "Video" except: pass obja = Anuncio(tipo, cs, ds, fs, ausi, aust) anuncios.append(obja) mes = browser.find_element_by_css_selector("._99s9 .l61y9joe").text browser.close() print("Informacion obtenida exitosamente ") print("Generando archivo Excel") if os.path.exists(diract + "\AdsReport.xlsx"): wb = load_workbook(diract + "\AdsReport.xlsx") ws = wb.create_sheet(title=dato) else: wb = Workbook() ws = wb.active ws.title = dato cn = 1 for i in anuncios: fecha = i.getFecha() desc = i.getDescription() img = i.getMedia() auth = i.getAuthTexto() ws['A' + str(cn)] = auth ws['B' + str(cn)] = img ws['C' + str( cn)] = " " # Esta columna se agrea por motivos de orden únicamente ws['D' + str(cn)] = fecha cn += 1 ws.column_dimensions["A"].width = 25 ws.column_dimensions["B"].width = 20 ws.column_dimensions["C"].width = 5 ws.column_dimensions["D"].width = 30 wb.save(filename=diract + '/AdsReport.xlsx') print("Archivo de Excel generado exitosamente en: " + diract + '/AdsReport.xlsx')
from selenium import webdriver import yahoo_fin.stock_info as si import time import json with open('config.json', 'r') as f: config = json.load(f) username = config["username"] password = config["password"] fireFoxOptions = webdriver.FirefoxOptions() fireFoxOptions.set_headless() driver = Firefox(firefox_options=fireFoxOptions) driver.get("https://ig.com/uk") print("Browser Opened") driver.find_element_by_link_text('Log in').click() WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.ID, 'account_id'))) driver.find_element_by_id('account_id').send_keys(username) driver.find_element_by_id('nonEncryptedPassword').send_keys(password + Keys.ENTER) print("Password entered") WebDriverWait(driver, 60).until( EC.presence_of_element_located((By.CLASS_NAME, 'value-amount'))) WebDriverWait(driver, 60).until( EC.text_to_be_present_in_element((By.CLASS_NAME, 'value-amount'), "£")) nav = driver.find_element_by_class_name('value-amount').get_attribute("title")
def webdriver(): driver = Firefox() yield driver driver.quit()
def driver(capabilities): driver = Firefox(capabilities=capabilities) yield driver driver.quit()
# from selenium.webdriver.support import expected_conditions as EC # conditionally run a code # from selenium.webdriver.support.wait import WebDriverWait # letting the program wait # from selenium.webdriver.common.by import By # specify what kind of syntax we use from the web page import os import requests from bs4 import BeautifulSoup import pandas as pd import re """## 1. Selenium for navigating through the page * Introduction [link](https://selenium-python.readthedocs.io/installation.html) * Make sure downloading a machine driver first (above link) """ # set up the directory so that the program can know where to pull out the browser root_directory = "C:\\downloads" driver = Firefox(executable_path=root_directory + '/geckodriver') # navigate through page driver.get("http://securities.stanford.edu/filings.html") go = driver.find_element_by_xpath('//*[@id="records"]/table/tbody/tr[1]/td[1]') i = 0 while i < 21: xpath = '//*[@id="records"]/table/tbody/tr[' + str(i) + ']/td[1]' go = driver.find_element_by_xpath(xpath) go.click() i += 1 # get the content html = driver.page_source b = BeautifulSoup(html, 'lxml')
from selenium.webdriver import Firefox from time import sleep browser = Firefox(executable_path='./geckodriver') url = "https://selenium.dunossauro.live/exercicio_05.html" browser.get(url) def preenche_forms(form, nome, senha): """Preenche os forms 4 dinamicamente Args: form ([type]): [Nome do form a ser preenchido] text ([type]): [Texto a ser passado para o input] """ browser.find_element_by_css_selector( f'.form-{form} input[name="nome"]').send_keys(nome) browser.find_element_by_css_selector( f'.form-{form} input[name="senha"]').send_keys(senha) browser.find_element_by_css_selector( f'.form-{form} input[name="{form}"]').click() # Code sleep(3) # Preenche o primeiro formulário preenche_forms('l0c0', 'diego', '123') # Preenche o segundo formulário
from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options uri = 'https://duckduckgo.com' title = 'DuckDuckGo' opts = Options() opts.set_headless() assert opts.headless # Operating in headless mode browser = Firefox(options=opts, executable_path='browsers/geckodriver') browser.get(uri) assert title in browser.title browser.implicitly_wait(2) search_bar = browser.find_element_by_id('search_form_input_homepage') print('Element Details: ', search_bar) search_bar.send_keys('lava chdum') search_button = browser.find_element_by_id('search_button_homepage') search_button.click() results = browser.find_elements_by_class_name('result') print("Found " + str(len(results)) + " results:") print('----------- RESULTS -----------') for x in results: print(x.get_attribute('innerHTML')) browser.close()
def download_author_models(author_id, args): ''' Write models of an author, which are not in any collection ''' with closing(Firefox()) as browser: # collection_id is made up as 'author-%s' % author_id url = 'https://3dwarehouse.sketchup.com/user.html?id=%s' % author_id collection_id = 'author-%s' % author_id collection_dir = op.join(CAD_DIR, collection_id) logging.info('will download coleection_id: %s' % collection_id) # if collection exists collection_path = op.join(collection_dir, README_NAME) if op.exists(collection_path): # if 'overwrite' enabled, remove everything and write from scratch if args.overwrite_collection: shutil.rmtree(collection_dir) else: # if 'overwrite' disabled, try to read what was downloaded try: collection_info = json.load(open(collection_path)) models_info = collection_info['vehicles'] # if 'overwrite' disabled and can't read/parse the readme except: raise Exception( 'Failed to parse the collection due to: %s' % sys.exc_info()[0]) else: models_info = [] if not op.exists(op.join(collection_dir, 'skp')): os.makedirs(op.join(collection_dir, 'skp')) # open the page with collection browser.get(url) WebDriverWait(browser, timeout=args.timeout).until( lambda x: x.find_elements_by_class_name('results-entity-link')) # get author element = browser.find_element_by_id('display-name') author_name = validateString( element.text.encode('ascii', 'ignore').decode("utf-8")) # keep scrolling the page until models show up (for pages with many models) prev_number = 0 while True: browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") elements = browser.find_elements_by_class_name( 'results-entity-link') logging.info('found %d models' % len(elements)) if prev_number == len(elements): break else: prev_number = len(elements) time.sleep(1) # get the model urls model_urls = [] for element in elements: model_url = element.get_attribute('href') model_urls.append(model_url) # download all models new_models_info = download_all_models(model_urls, models_info, collection_id, collection_dir) collection_info = { 'collection_id': collection_id, 'collection_name': '', 'author_id': author_id, 'author_name': author_name, 'vehicles': new_models_info } with open(op.join(collection_dir, README_NAME), 'w') as f: f.write(json.dumps(collection_info, indent=4))
import sqlite3 import datetime profile = FirefoxProfile() profile.set_preference("browser.download.panel.shown", False) profile.set_preference("browser.helperApps.neverAsk.openFile", "text/xls,application/vnd.ms-excel") profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/xls,application/vnd.ms-excel") profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.dir", os.path.join(os.getcwd(), 'excel')) options = Options() options.add_argument("--headless") driver = Firefox(firefox_options=options, firefox_profile=profile) wait = WebDriverWait(driver, timeout=10) print("Firefox Headless Browser Invoked") driver.get('https://www.xmarkets.db.com/DE/ENG/Product_Overview/WAVEs_Call') print("Got web page") Select( wait.until( expected.visibility_of_element_located(( By.CSS_SELECTOR, '#ctl00_leftNavigation_ctl00_asidebar-tab-navigation_DerivativeFilter_UnderlyingTypeFilterDropdown' )))).select_by_visible_text("Indices Germany") sleep(2) Select( wait.until(
def test_my_first_firefox_selenium_test(): # Uruchomienie przeglądarki Firefox. Ścieżka do geckodrivera (drivera dla Firefoxa) # ustawiana automatycznie przez bibliotekę webdriver-manager browser = Firefox(executable_path=GeckoDriverManager().install())
from selenium.webdriver import Firefox url = 'http://selenium.dunossauro.live/aula_05_c.html' firefox = Firefox() firefox.get(url) def melhor_filme(browser, filme, email, telefone): """Preenche o formulário do melhor filme de 2020.""" browser.find_element_by_name('filme').send_keys(filme) browser.find_element_by_name('email').send_keys(email) browser.find_element_by_name('telefone').send_keys(telefone) browser.find_element_by_name('enviar').click() melhor_filme(firefox, 'Parasita', '*****@*****.**', '(019)987654321') firefox.quit()
from selenium.webdriver import Firefox from selenium.webdriver.common.action_chains import (ActionChains) from selenium.webdriver.common.keys import Keys url = 'https://selenium.dunossauro.live/aula_08_a' browser = Firefox() browser.implicitly_wait(30) browser.get(url) texto = 'selenium' # hi-level elemento = browser.find_element_by_name('texto') # low-level ac = ActionChains(browser) ac.move_to_element(elemento) ac.click(elemento) def digita_com(key): ac.key_down(key) for letra in texto: ac.key_down(letra) ac.key_up(letra) ac.key_up(key) ac.perform()
from selenium.webdriver import Firefox b = Firefox() url = 'http://selenium.dunossauro.live/aula_06_a.html' b.get(url) # ===== Usando a atributo 'type' [att=valor] ===== tag_type_nome = b.find_element_by_css_selector('[type="text"]') tag_type_senha = b.find_element_by_css_selector('[type="password"]') tag_type_btn = b.find_element_by_css_selector('[type="submit"]') # ===== Usando a atributo 'name' [att=valor] ===== tag_name_nome = b.find_element_by_css_selector('[name="nome"]') tag_name_senha = b.find_element_by_css_selector('[name="senha"]') tag_name_btn = b.find_element_by_css_selector('[name="l0c0"]') # ===== Usando a atributo [att*=valor] ===== """ nome = b.find_element_by_css_selector('[name*="ome"]') senha = b.find_element_by_css_selector('[name*="nha"]') btn = b.find_element_by_css_selector('[name*="l0"]') """ # ===== Usando a atributo [att|=valor] ===== """ nome = b.find_element_by_css_selector('[name|="nome"]') senha = b.find_element_by_css_selector('[name|="senha"]') btn = b.find_element_by_css_selector('[name|="l0c0"]') """
def setUp(self) -> None: self.path = Path(__file__).parent.parent print(self.path) opts = Options() opts.headless = True self.browser = Firefox(options=opts)
from selenium.webdriver import Firefox import os import json from time import sleep def see_more(): while True: try: wb.find_element_by_xpath("//a[@class='PJ4k2']").click() except: break wb = Firefox(executable_path=os.getcwd() + "/geckodriver.exe") wb.get("https://www.instagram.com/explore/locations/") data = {} see_more() for i in wb.find_elements_by_xpath("//a[@class='aMwHK']"): txt1 = i.text wb.execute_script('''window.open("{}","_blank");'''.format( i.get_attribute("href"))) sleep(1) wb.switch_to.window(wb.window_handles[1]) see_more() data[txt1] = {} for j in wb.find_elements_by_xpath("//a[@class='aMwHK']"): txt2 = j.text wb.execute_script('''window.open("{}","_blank");'''.format( j.get_attribute("href"))) sleep(1)