def __play_at_fastest_speed(browser: webdriver.Firefox): js = 'return document.getElementById("movie_player").getAvailablePlaybackRates()' playback_rates: list = list(browser.execute_script(js)) fast_js = 'document.getElementById("movie_player").setPlaybackRate({})' \ .format(playback_rates[-1]) browser.execute_script(fast_js) log.info("\tPlay at fastest speed {}.".format(playback_rates[-1]))
def get_praise(): browser = Firefox( executable_path=r'C:\Program Files\Mozilla Firefox\geckodriver.exe') browser.get("https://www.google.com/search?q=Lenovo IdeaPad 330" ) #.format(object_p) results = browser.find_elements_by_css_selector('div.g') href = [] for result in results: link = result.find_element_by_tag_name("a") href.append(link.get_attribute("href")) iterator = 0 print(len(href)) list_price = [] for link in href: try: #time.sleep(5) browser.execute_script("window.open('');") browser.switch_to.window( browser.window_handles[iterator]) # Switch to the new window print(link) browser.get(link) print(browser.find_elements_by_css_selector('div')) iterator += 1 except: print("Error: unable to start thread") #object_p = input() print(list_price)
def recent_post_links(username, post_count=10): """ With the input of an account page, scrape the 10 most recent posts urls Args: username: Instagram username post_count: default of 10, set as many or as few as you want Returns: A list with the unique url links for the most recent posts for the provided user """ url = "https://www.instagram.com/" + username + "/" firefox_options = Options() firefox_options.add_argument("--headless") browser = Firefox(firefox_options=firefox_options) browser.get(url) post = 'https://www.instagram.com/p/' post_links = [] while len(post_links) < post_count: links = [ a.get_attribute('href') for a in browser.find_elements_by_tag_name('a') ] for link in links: if post in link and link not in post_links: post_links.append(link) scroll_down = "window.scrollTo(0, document.body.scrollHeight);" browser.execute_script(scroll_down) time.sleep(10) else: browser.stop_client() return post_links[:post_count]
def html2png(tid): # print('into html2png') url = 'file:///' + path.join(path.dirname(__file__), str(tid), 'cvt.html') # "file:///D:/source/ngapost2md/22143850/cvt.html" save_fn = path.join(path.dirname(__file__), str(tid), 'cvt.png') option = Options() option.add_argument('--headless') option.add_argument('--disable-gpu') option.add_argument("--window-size=800,600") option.add_argument("--hide-scrollbars") driver = Firefox(executable_path='geckodriver', firefox_options=option) # print(url) driver.get(url) # print(driver.title) scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth') scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight') driver.set_window_size(scroll_width, scroll_height) # try: driver.save_screenshot(save_fn) # except: # print("ERROR") driver.quit() time.sleep(5) os.system("taskkill /F /IM firefox.exe")
def load_localstorage(self, session_id): sessions = json.load(open(self.sessions_file)) storage_path = sessions[str(session_id)]["session_path"] url = sessions[str(session_id)]["web_url"] # Setting useragent to the same one the session saved with useragent = sessions[str(session_id)]["useragent"] profile = FirefoxProfile() profile.set_preference("general.useragent.override", useragent) localStorage = pickle.load(open(storage_path, "rb")) try: browser = Firefox(profile) except: error("Couldn't open browser to view session!") return browser.get(url) browser.delete_all_cookies() browser.execute_script( "window.localStorage.clear()") # clear the current localStorage for key, value in localStorage.items(): browser.execute_script( "window.localStorage.setItem(arguments[0], arguments[1]);", key, value) status(f"Session {session_id} loaded") browser.refresh() self.browsers.append(browser)
def getPage(url): driver = Firefox(executable_path='geckodriver', firefox_options=options) limit=2 driver.set_page_load_timeout(limit) driver.set_script_timeout(limit) try: driver.get(url) except: pass locator = (By.CSS_SELECTOR, 'div.capsule__action--buttons') while True: try: WebDriverWait(driver, 400, 0.5).until(EC.presence_of_element_located(locator)) return getPaper(driver.page_source.encode('utf-8'),url) driver.execute_script('window.stop()') driver.close() except BaseException,e: driver.execute_script('window.stop()') driver.close() sleep(10) driver = Firefox(executable_path='geckodriver', firefox_options=options) limit=2 driver.set_page_load_timeout(limit) driver.set_script_timeout(limit) print 'base',type(e),url
def get_inner_html(driver: webdriver.Firefox): """ Get html from inner iframe """ while not driver.execute_script(INNER_FRAME_READY) == 0: driver.implicitly_wait(0.5) return driver.execute_script( "return document.querySelector('iframe#tab-content').contentWindow.document.body.innerHTML" )
class Posts: """Scapes Instagram Posts information and returns them as dynamically created attributes.""" # handles scraping of information as well as setting them as attributes scrape = Scraper() # handles creation of DataFrame df = Create_DataFrame() # checks .env for credentials user = CheckEnv() password = CheckEnv() def __init__(self, term, n=9, user=None, password=None): self.user = user self.password = password self.browser = Firefox() self.scrape = { "post_urls": self.get_post_urls(term, n), "browser": self.browser, } self.df = self.scrape def get_post_urls(self, term, n): """ Retrieves urls of Instagram posts based on hashtag or username :param term: hashtag or username :param n: number of posts to be scraped :return: list of post urls """ # gives appropriate url based on term url = user_or_tag(term) # logs in if credentials are provided if self.user and self.password: login(self.browser, self.user, self.password) self.browser.get(url) post_links = [] # appends to post url until number of posts specified while len(post_links) < n: # gathers all links on webpage lis = self.browser.find_elements_by_tag_name("a") for web_element in lis: href = web_element.get_attribute("href") # checks if links are Instagram posts and prevent repeat posts if "https://www.instagram.com/p/" in href and href not in post_links: post_links.append(href) # scrolls down to retrieve more posts scroll_down = "window.scrollTo(0, document.body.scrollHeight);" self.browser.execute_script(scroll_down) # sleeps to prevent being banned time.sleep(3) posts = post_links[:n] return posts
def browser(request, config_browser, config_wait_time): # Initialize WebDriver if config_browser == 'chrome': driver = webdriver.Chrome(ChromeDriverManager().install()) elif config_browser == 'chrome-local': options = webdriver.ChromeOptions() options.add_argument( "--unsafely-treat-insecure-origin-as-secure=http://swarm_nginx,ws://eth-node-nginx:8546,http://eth-node-nginx:8545,http://client-backend-nginx,http://service-cdp:3010,http://client-api:10130,http://core-rpc:3700,http://notary_nginx" ) driver = webdriver.Remote( command_executor=SELENIUM_HUB_URL, desired_capabilities=options.to_capabilities()) elif config_browser == 'firefox': driver = Firefox() else: raise Exception(f'"{config_browser}" is not a supported browser') # Wait implicitly for elements to be ready before attempting interactions driver.implicitly_wait(config_wait_time) # open browser fullscreen driver.maximize_window() # @todo implement video_recording again with docker # video_recorder_process = subprocess.Popen(['python3', 'helpers/video_recording.py']).pid # Return the driver object at the end of setup yield driver if request.node.rep_call.failed: # Make the screen-shot if test failed: try: driver.execute_script("document.body.bgColor = 'white';") # Attaching video to Allure # @todo implement video_recording again with docker # screenshot_capturing.terminate(video_recorder_process) # _attach_video_to_allure() # Attaching screenshot to Allure allure.attach(driver.get_screenshot_as_png(), name=request.function.__name__, attachment_type=allure.attachment_type.PNG) except: pass # just ignore else: # @todo implement video_recording again with docker # screenshot_capturing.terminate(video_recorder_process) if logger.soft_assert_fail: logger.soft_assert_fail = False allure.attach("Traceback: \n" + logger.logs) # @todo implement video_recording again with docker # _attach_video_to_allure() raise Exception("There is a soft assertion failure") logger.logs = "" # For cleanup, quit the driver def fin(): driver.close() request.addfinalizer(fin)
def get_signature(): firefox = Firefox() firefox.get('https://www.toutiao.com/ch/news_fashion/') ascp = firefox.execute_script('return ascp.getHoney()') sinature = firefox.execute_script('return TAC.sign(' + '' + str(time.time()) + ')') print(ascp) print(sinature)
def callWebsite(): try: opts = Options() opts.set_headless() assert opts.headless browser = Firefox(options=opts) browser.implicitly_wait(3) browser.get( 'https://ebilet.tcddtasimacilik.gov.tr/view/eybis/tnmGenel/tcddWebContent.jsf' ) nereden = browser.find_element_by_id('nereden') nereden.clear() nereden.send_keys('İstanbul(Halkalı)') print( browser.find_element_by_xpath( "//input[contains(@id,'nereden')]").get_attribute('value')) nereye = browser.find_element_by_id('nereye') nereye.clear() nereye.send_keys('Ankara Gar') print( browser.find_element_by_xpath( "//input[contains(@id,'nereye')]").get_attribute('value')) tarih = browser.find_element_by_id('trCalGid_input') tarih.clear() tarih.send_keys(takipEdilecekTarih) print( browser.find_element_by_xpath( "//input[contains(@id,'trCalGid_input')]").get_attribute( 'value')) ara = browser.find_element_by_id('btnSeferSorgula') browser.execute_script("arguments[0].click()", ara) time.sleep(5) results = browser.find_element_by_id( 'mainTabView:gidisSeferTablosu:1:j_idt104:0:somVagonTipiGidis1_label' ) a = results.text.split(" ")[3] global number number = a[a.find("(") + 1:a.find(")")] except: callWebsite() return int(number)
class Downloader: """Deprecated 动态页面下载器""" def __init__(self, driver="Chrome", load_time=10): start_time = time.time() if driver == "Chrome": self.driver = Chrome() else: options = Options() options.add_argument('-headless') self.driver = Firefox(firefox_options=options) logging.info("Webdriver init spent " + str(time.time() - start_time) + "s.") self.driver.set_page_load_timeout(load_time) # get页面时最多等待页面加载10S def __enter__(self): """在使用with语句时使用,返回值与as后的参数绑定""" return self def __exit__(self, exc_type, exc_val, exc_tb): """在with语句完成时,对象销毁前调用""" self.driver.quit() def download(self, url, after_scroll_time=3): """下载一个web页面""" start_time = time.time() try: self.driver.get(url) # 请求页面 # todo 存储图片 screenshot_base64 = self.driver.get_screenshot_as_base64() except TimeoutException as e: logging.info("Get url:" + url + ", msg: " + e.msg) self.driver.execute_script("window.stop()") finally: load_time = time.time() - start_time logging.info("Get url:" + url + " spend " + str(load_time) + "s.") js_scroll = """ function go_down() { var h = document.documentElement.scrollHeight || document.body.scrollHeight; window.scroll(h, h); } go_down() """ # 翻页JS self.driver.execute_script(js_scroll) # 执行翻页 time.sleep(after_scroll_time) # 执行了翻页后等待页面加载nS current_url = self.driver.current_url page_source = self.driver.page_source download_item = MainItem() # 初始化结果对象 # 填充现有信息 download_item.request_url = url download_item.final_url = current_url # download_item.screen_shot = screenshot_base64 download_item.load_time = load_time download_item.html = page_source download_item.get_time = int(time.time()) # 时间戳 return download_item
def lms_login(driver: webdriver.Firefox, url=LMS_LOGIN): driver.get(url) login_input = driver.find_element_by_id("username") for c in LOGIN: login_input.send_keys(c) time.sleep(0.1) pass_input = driver.find_element_by_id("password") for c in PASSWORD: pass_input.send_keys(c) time.sleep(0.1) login_btn = driver.find_element_by_id("loginbtn") login_btn.click() time.sleep(1) # card = driver.find_element_by_link_text("CSR-1. 12.01.2021_Python Programming For Penetration Testing") # card = driver.find_element_by_link_text("CSR-23_ 12.05.2021_ Python Programming For Penetration Testing") # card = driver.find_element_by_xpath(xpath="//a/span[contains(text(),'CSR-23_ 12.05.2021_ Python Programming For Penetration Testing')]") # card = driver.find_element_by_xpath(xpath="//a/span[@class='foo']") # card = card.find_element_by_xpath("..") WebDriverWait(driver, 20).until( expected_conditions.element_to_be_clickable(( By.XPATH, "//a/span[contains(text(),'CSR-23_ 12.05.2021_ Python Programming For Penetration Testing')]" ))).click() # time.sleep(1) # card.click() # print(card.text) # a = card.parent # print("a", a.tag_name) # print("a", a.text) time.sleep(1) module = driver.find_element_by_id("module-11224") # tasks = module.find_element_by_tag_name("a") tasks = module.find_element_by_xpath(".//a[1]") # tasks.click() url = tasks.get_attribute("href") print("task url:", url) driver.get(url) time.sleep(1) table = driver.find_element_by_class_name("generaltable") # row = driver.find_element_by_partial_link_text(datetime.datetime.today().strftime("%d.%m.%y")) row = table.find_element_by_xpath(".//*[contains(text(),'{}')]".format( datetime.datetime.today().strftime('%-d.%m.%y'))) parent_tr = row.find_element_by_xpath('..').find_element_by_xpath("..") # x,y = row.location_once_scrolled_into_view print(parent_tr.tag_name) a = parent_tr.find_element_by_xpath(".//*[@title='Пароль']") # a = row.find_element_by_class_name("helptooltip") url = a.get_attribute("href") driver.get(url) # a.click() time.sleep(2) driver.execute_script("alert('end')")
def hide_wayback_machine_bar(driver: webdriver.Firefox, element_id: str = 'wm-ipp-base'): try: WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, element_id))) driver.execute_script('document.body.removeChild' f"(document.getElementById('{element_id}'))") return True except TimeoutException: return False
def run_browser_and_crawl_urls(urls: list, need_scrolling=False, already_downloaded: list = None) -> list: """ Start browser & crawl urls. Parse Sources url and return it. :param urls: Urls for crawling :param need_scrolling: Scrollings is needed? :param already_downloaded: Downloaded url for breakepoints in scrolling :return: """ # Function for get sources links from HTML def get_sources(brwr) -> list: """ :param brwr: Selenium Driver instance :return: """ _sources = [] for selector, attr in CSS_SELECTORS: for e in browser.find_elements_by_css_selector(selector): _sources.append(e.get_attribute(attr)) pass pass return _sources browser = Firefox() sources = [] for url in urls: browser.get(url) page = browser.page_source # If scrollings is needed, then scroll to end. while True and need_scrolling: browser.execute_script(f'window.scrollTo(0, 9999999999999)') sleep(SCROLL_WAIT_TIME) # Wait for Async loading if already_downloaded: # Breakpoints url if len(set(already_downloaded) & set(get_sources(browser))) > 0: break pass if page == browser.page_source: # If same content then break this loop break page = browser.page_source pass sources += get_sources(browser) pass browser.close() return sources
def test_google_search(): driver = Firefox(executable_path='/home/udtech3574/user/geckodriver') driver.get('http://circa.udtech.global/en/') driver.find_element_by_xpath( './/div/div/a[@class="big Button-sc-1g20tyc-0 dxdNr"]').click() driver.find_element_by_xpath( './/div/div/a[@class="ClosestLocationBlock__SchedulerButton-c1i0bd-6 gwLpBX ' 'small Button-sc-1g20tyc-0 dxdNr"]').click() # def check_result_count(driver): driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") a = driver.find_elements_by_xpath("//label[contains(text(), '0')]") if len(a) == 0: print('A=0') a[1].click()
def __call__(self, driver: webdriver.Firefox) -> bool: try: # intentionally conflating null and undefined with != is_ready = driver.execute_script(f"return {self.obj} != null") except JavascriptException: return False else: return is_ready
def checkout(web_driver: webdriver.Firefox, user_selections): web_driver.get(cart_url) #Starts the checkout process web_driver.find_element_by_id(checkout_button_id).click() #Verifies and ships using default shipping info web_driver.execute_script("CheckShippingAddress()") time.sleep(2) web_driver.execute_script("ChoiceAddress()") time.sleep(2) web_driver.find_element_by_id(agree_tos_checkbox_id).click() web_driver.find_element_by_id(checkout_continue_button_id).click() time.sleep(2) #I only support credit card purchases at this time. web_driver.find_element_by_id(credit_card_radio_id).click() web_driver.find_element_by_id(checkout_continue_button_id).click() #Populate credit card info web_driver.find_element_by_id(cardholder_name_input_id).send_keys( user_selections["cardholder_name"]) web_driver.find_element_by_id(credit_card_input_id).send_keys( user_selections["card_number"]) web_driver.find_element_by_id(cvv_input_id).send_keys( user_selections["security_code"]) Select(web_driver.find_element_by_id(expiration_month_id)).select_by_value( user_selections["expiration_month"]) Select(web_driver.find_element_by_id(expiration_year_id)).select_by_value( user_selections["expiration_year"]) #Submit and wait for card info to get validated web_driver.find_element_by_id(modal_checkout_continue_button_id).click() time.sleep(10) #Press final agree box web_driver.find_element_by_id(agree_tos_checkbox_part_two_id).click() time.sleep(2) #Complete the order web_driver.find_element_by_id(checkout_continue_button_id).click() print( "we believe we have placed your order. Please verify, and if this worked enjoy your new card :)." )
def read_url(url): driver = Firefox(options=options) driver.maximize_window() driver.get(url) time.sleep(4) height = driver.execute_script("return document.body.scrollHeight") print(height) position = 0 while position < height: driver.execute_script(f"window.scrollTo(0, {position});") delta = random.randint(50, 500) position += delta duration = delta // 20 # print(height, position, delta, duration) time.sleep(duration) driver.close()
def get_localstorage(driver: webdriver.Firefox) -> Dict[str, str]: """ Load all data from localstorage """ return driver.execute_script(r"""var ls = window.localStorage; var items = {}; var key; for (var i = 0; i < ls.length; i += 1) { key = ls.key(i) items[key] = ls.getItem(key); } return items;""")
def screenshot_title(browser: webdriver.Firefox, link: str, directoryname: str, filename: str, id: str): """The browser takes a screenshot of the title Args: link (str): The link to the post's title directoryname (str): The directory to save the screenshot in filename (str): The name of the file to save as id (str): The id of the title to move to on the page Returns: None """ browser.get(link) nsfw_check(browser) if id: element = browser.find_element_by_id(id) browser.execute_script("arguments[0].scrollIntoView(alignToTop=false);", element) browser.save_screenshot("{}/pictures/{}.png".format(directoryname, filename))
def __get_player_status(browser: webdriver.Firefox) -> str: status_check_list = { -1: 'unstarted', 0: 'ended', 1: 'playing', 2: 'paused', 3: 'buffering', 5: 'video cued' } js = 'return document.getElementById("movie_player").getPlayerState()' return status_check_list[browser.execute_script(js)]
def result(): if request.method == 'POST': print(request.form["phoneNumber"]) phoneNumber=request.form["phoneNumber"] driver = Firefox() driver.get("https://voip.ms/login") email = driver.find_elements_by_xpath('//input[@id="username"]')[1] email.send_keys('*****@*****.**') password = driver.find_elements_by_xpath('//input[@id="password"]')[1] password.send_keys('Developer44$') login_btn = driver.find_elements_by_xpath('//input[@class="btn btn-ghost btn-big"]')[1] login_btn.click() time.sleep(5); #wait = WebDriverWait(driver, 10); #wait.until(EC.visbtibility_of_element_located((By.XPATH, '//a[contains(@href,"/m/managesubaccount.php")]'))).click() btn = driver.find_element_by_link_text("Sub Accounts") hover = ActionChains(driver).move_to_element(btn) hover.perform() subbtn = driver.find_element_by_link_text("Manage Sub Accounts") subbtn.click(); time.sleep(5) #element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//a[contains(@href,"editsub.php?action=edit&id=266419")'))) #element.click() el = driver.find_elements_by_class_name("edit-icon")[1]; el.click(); time.sleep(5) el1 = driver.find_element_by_name("callerid"); el1.clear() el1.send_keys(phoneNumber); update = driver.find_element_by_id("button"); driver.execute_script("arguments[0].click();", update) time.sleep(5); driver.close(); return "Success"
def load_cookie(self, session_id): sessions = json.load(open( self.sessions_file )) cookie_path = sessions[str(session_id)]["session_path"] url = sessions[str(session_id)]["web_url"] # Setting useragent to the same one the session saved with useragent = sessions[str(session_id)]["useragent"] profile = FirefoxProfile() profile.set_preference("general.useragent.override", useragent ) cookies = pickle.load(open(cookie_path, "rb")) try: browser = Firefox(profile) except: error("Couldn't open browser to view session!") return browser.get(url) browser.delete_all_cookies() browser.execute_script("window.localStorage.clear()") # clear the current localStorage for cookie in cookies: browser.add_cookie(cookie) status(f"Session {session_id} loaded") browser.refresh() self.browsers.append(browser)
def driver_open_noBS(url): from selenium.webdriver import Firefox from selenium.webdriver.firefox.options import Options options = Options() options.add_argument('--headless') options.add_argument('--disable-gpu') driver = Firefox(executable_path='/usr/local/bin/geckodriver', options=options) driver.get(url) html1 = driver.page_source html2 = driver.execute_script("return document.documentElement.innerHTML;") driver.close() return html2
def last_5_post(username): url = "https://www.instagram.com/" + username options = Options() options.add_argument('-headless') browser = Firefox(executable_path='geckodriver', options=options) browser.get(url) post = 'https://www.instagram.com/p/' link_post_temp = [] link_post = [] while len(link_post) < 2: links = browser.find_elements_by_xpath("//a[@href]") for a in links: link_post_temp.append(a.get_attribute("href")) for link in link_post_temp: if post in link and link not in link_post: link_post.append(link) scroll_down = "window.scrollTo(0, document.body.scrollHeight);" browser.execute_script(scroll_down) time.sleep(10) else: browser.stop_client() return link_post[:2]
def recent_post_links(username, post_count=10): """ With the input of an account page, scrape the 10 most recent posts urls Args: username: Instagram username post_count: default of 10, set as many or as few as you want Returns: A list with the unique url links for the most recent posts for the provided user """ url = "https://www.instagram.com/" + username + "/" firefox_options = Options() firefox_options.add_argument("--headless") browser = Firefox( executable_path=r'/Users/Akira/Desktop/WebScrapperrrrr/geckodriver.exe', firefox_options=firefox_options) browser.get(url) post = 'https://www.instagram.com/p/' post_links = [] while len(post_links) < post_count: b = browser.find_elements_by_xpath( '//*[@id="react-root"]/section/main/div/div[3]/article/div[1]/div') links = [ b.get_attribute('href') for b in browser.find_elements_by_tag_name('a') ] for link in links: if post in link and link not in post_links: post_links.append(link) scroll_down = "window.scrollTo(0, document.body.scrollHeight);" browser.execute_script(scroll_down) else: browser.stop_client() return post_links[:post_count] browser.close()
def browser(request, config_browser, config_wait_time): # Initialize WebDriver if config_browser == 'chrome': driver = webdriver.Chrome(ChromeDriverManager().install()) elif config_browser == 'firefox': driver = Firefox() else: raise Exception(f'"{config_browser}" is not a supported browser') # Wait implicitly for elements to be ready before attempting interactions driver.implicitly_wait(config_wait_time) # open browser fullscreen driver.maximize_window() # Return the driver object at the end of setup yield driver if request.node.rep_call.failed: # Make the screen-shot if test failed: try: driver.execute_script("document.body.bgColor = 'white';") # Attaching video to Allure # @todo implement video_recording again with docker # screenshot_capturing.terminate(video_recorder_process) # _attach_video_to_allure() # Attaching screenshot to Allure allure.attach(driver.get_screenshot_as_png(), name=request.function.__name__, attachment_type=allure.attachment_type.PNG) except: pass # just ignore # For cleanup, quit the driver def fin(): driver.close() request.addfinalizer(fin)
def scrape(self): # CONNECT THE DATABASE connector = sqlite3.connect('games.db') cursor = connector.cursor() cursor.execute('CREATE TABLE IF NOT EXISTS allGames(' ' minute TEXT, home_team TEXT, away_team TEXT,' ' home_score DECIMAL, away_score DECIMAL,' ' home_odd REAL, draw_odd REAL, away_odd REAL)') options = FirefoxOptions() options.headless = True driver = Firefox(options=options, executable_path='C:\Windows\geckodriver.exe') driver.get(self.WEB_LINKS["football"]) sleep(2) driver.find_element_by_xpath( '/html/body/div[1]/div/div[2]/div[1]/div[2]/div/div[1]/div/div[1]/div[1]/i' ).click() sleep(1) html = driver.execute_script( 'return document.documentElement.outerHTML;') soup = BeautifulSoup(html, 'html.parser') games = soup.find_all( class_=re.compile("sr-match-container sr-border")) all_games = [game for game in games] driver.close() for game in all_games: print(str(game)) # TAKE THE MINUTE minute_pattern = r'[n]\"\>([A-z0-9]{1,2})' minute_token = re.search(minute_pattern, str(game)) minute = minute_token.group(1) # FIND THE TEAMS teams_pattern = r'[e]\=\"(.{1,40})\"\>' teams_token = re.findall(teams_pattern, str(game)) home_team, away_team = teams_token[0], teams_token[1] # FIND THE SCORE home_score_pattern = r'[e]\"\>(\d{1,2})\<' away_score_pattern = r'[y]\"\>(\d{1,2})\<' home_score_token = re.search(home_score_pattern, str(game)) away_score_token = re.search(away_score_pattern, str(game)) home_score = home_score_token.group(1) away_score = away_score_token.group(1)
def upload_source(driver: webdriver.Firefox, file: Path): """Upload file to Karma""" driver.find_elements_by_css_selector("ul.nav > li.dropdown")[0].click() short_delay() file_input = driver.find_element_by_css_selector("form#fileupload input") driver.execute_script( ''' arguments[0].style = ""; arguments[0].style.display = "block"; arguments[0].style.visibility = "visible";''', file_input) file_input.send_keys(str(file)) delay() # select file format driver.find_element_by_css_selector("#btnSaveFormat").click() delay() # select #objects to import driver.find_element_by_css_selector("#btnSaveOptions").click() total_wait_seconds = 0 while total_wait_seconds < 30: total_wait_seconds += delay() # check if worksheet is loaded try: if driver.find_element_by_css_selector( "#WorksheetOptionsDiv a").text.strip() == file.name: break except NoSuchElementException: pass else: raise Exception("Cannot load worksheet of source: %s" % file.name) delay()
def get_selenium_js_html(self, url): options = Options() options.add_argument('-headless') # 无头参数 driver = Firefox(executable_path='geckodriver', firefox_options=options) # 配了环境变量第一个参数就可以省了,不然传绝对路径 wait = WebDriverWait(driver, timeout=10) driver.get(url) #browser = webdriver.PhantomJS() #browser.get(url) time.sleep(3) # 执行js得到整个页面内容 html = driver.execute_script( "return document.documentElement.outerHTML") driver.quit() return html
def login(username, password): """Login into website, return cookies, api and sso token using geckodriver/firefox headless""" display = Display(visible=0, size=(800, 600)) display.start() # options = Options() # options.add_argument('-headless') # driver = Firefox(executable_path='/usr/local/bin/geckodriver', firefox_options=options) driver = Firefox() wait = WebDriverWait(driver, timeout=10) driver.get(url) time.sleep(10) username_field = driver.find_element_by_name("emailOrPcrNumber") # There are multiple entries with the name pin, use the xpath instead even though it is more error prone # password_field = driver.find_element_by_name("pin") password_field = driver.find_element_by_xpath('/html/body/div[1]/div/div/div[2]/div[1]/div[2]/form/div/div[1]/div[2]/input') username_field.clear() username_field.send_keys(username) password_field.clear() password_field.send_keys(password) time.sleep(2) driver.find_element_by_id("tpiSubmitButton").click() time.sleep(3) cookies = driver.get_cookies() for cookie in cookies: if cookie['name'] == 'X-IHG-SSO-TOKEN': sso_token = cookie['value'] api_key = driver.execute_script('return AppConfig.featureToggle.apiKey') driver.get('https://apis.ihg.com') cookies.extend(driver.get_cookies()) driver.quit() display.stop() return api_key, sso_token, cookies
class Hertz(): def __init__( self ): self.driver = Firefox() #self.driver.get( 'https://www.hertzequip.com/herc/rental-equipment/aerial-equipment+manlifts+articulating-boom-lift' ) self.driver.implicitly_wait( 15 ) def perform_search( self, zip_code ): self.driver.get( 'https://www.hertzequip.com/herc/rental-equipment/aerial-equipment+manlifts+articulating-boom-lift' ) # wait for javascript to load the page before we begin self.driver.find_element_by_xpath( '//span[text()="View Rates"]' ) # selenium commands can be very unreliable # since Jquery is already loaded, I'll use that self.driver.execute_script( """ $('span:contains("View Rates")')[0].click() $('input[id="postalCode"]').val('""" + zip_code + """') $('a[class="button_four button_four_skin"]')[0].click() """ ) # wait a few seconds for page element to disappear sleep( 3 ) # wait until javascript has loaded the page again self.driver.find_element_by_xpath( '//span[text()="View Rates"]' ) def get_rates( self ): rates_info_json = self.driver.execute_script( """ var xmlhttp = new XMLHttpRequest() var url = "https://www.hertzequip.com/herc/rest/rentals/getBuilderEquipmentDetailsItemInfo?catalogType=" + String( catalogType ) + "&categoryId=" + categoryId + "&subcategory1Id=" + subcategory1Id + "&subcategory2Id=" + subcategory2Id + "&_=" + String( new Date().getTime() ) xmlhttp.open( "GET", url, false ) xmlhttp.send() return xmlhttp.responseText """ ) return rates_info_json def get_store_info( self ): store_info_json = self.driver.execute_script( """ var xmlhttp = new XMLHttpRequest() var url = "https://www.hertzequip.com/herc/rest/rentals/getBuilderJobLocationsItemInfo?_=" + String( new Date().getTime() ) xmlhttp.open( "GET", url, false ) xmlhttp.send() return xmlhttp.responseText """ ) return store_info_json def perform_search_get_data( self, zip_code ): self.perform_search( zip_code ) location_data = json.loads( self.get_store_info() ) equipment_rates_data = json.loads( self.get_rates() ) specific_data = OrderedDict() specific_data['branch_zip_code'] = location_data['data']['jobLocation']['branchLocation']['zip'] specific_data['branch_id'] = location_data['data']['jobLocation']['branchLocation']['branch'] specific_data['equipment_list'] = [] for equipment in equipment_rates_data['data']['equipmentInfo']['m_list']: equipment_data = OrderedDict() equipment_data['specs'] = [ equipment['spec1'], equipment['spec2'] ] equipment_data['makes & models'] = equipment['makesModels']['m_list'] equipment_data['rates'] = equipment['ratesInfo'] specific_data['equipment_list'].append( equipment_data ) return { 'location_data' : location_data, 'equipment_rates_data' : equipment_rates_data, 'specific_data' : specific_data }
def test_we_can_switch_context_to_chrome(self, capabilities): driver = Firefox(capabilities=capabilities) driver.set_context('chrome') assert 1 == driver.execute_script("var c = Components.classes; return 1;")