class EdgeTest(unittest.TestCase): def setUp(self): WebDriverFactory().setup_edgedriver() from msedge.selenium_tools import Edge, EdgeOptions options = EdgeOptions() options.use_chromium = True options.set_capability('platform', 'MAC' if OS_NAME == 'MAC' else 'WINDOWS') self.driver1 = Edge(options=options) self.driver2 = Edge(options=options) self.driver1.maximize_window() self.wait1 = WebDriverWait(self.driver1, 5) self.wait2 = WebDriverWait(self.driver2, 5) def tearDown(self): self.driver2.quit() self.driver1.quit() def test_something(self): self.driver1.get('https://www.google.com') logger.info(self.driver1.title) self.assertIn(self.driver1.title, 'Google') self.driver1.get('https://www.naver.com') news_btn1 = self.driver1.find_element(By.CSS_SELECTOR, '.link_news') news_btn1.click() logger.info(self.driver1.current_url) self.assertTrue(self.wait1.until(expected_conditions.url_contains('https://news.naver.com'))) self.driver2.get('https://www.google.com') logger.info(self.driver2.title) self.assertIn(self.driver2.title, 'Google') self.driver2.get('https://www.naver.com') news_btn2 = self.driver2.find_element(By.CSS_SELECTOR, '.link_join') news_btn2.click() logger.info(self.driver2.current_url) self.assertTrue(self.wait2.until(expected_conditions.url_contains('https://nid.naver.com')))
def main(position, location): """Run the main program routine""" scraped_jobs = [] scraped_urls = set() url = get_url(position, location) # setup web driver options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) driver.implicitly_wait(5) driver.get(url) # extract the job data while True: cards = driver.find_elements_by_class_name('jobsearch-SerpJobCard') get_page_records(cards, scraped_jobs, scraped_urls) try: driver.find_element_by_xpath('//a[@aria-label="Next"]').click() except NoSuchElementException: break except ElementNotInteractableException: driver.find_element_by_id('popover-x').click() # to handle job notification popup get_page_records(cards, scraped_jobs, scraped_urls) continue # shutdown driver and save file driver.quit() save_data_to_file(scraped_jobs)
def Restart_Modem(): options = EdgeOptions() options.use_chromium = True options.add_argument("-inprivate") edge_browser = Edge(options=options) edge_browser.get('http://192.168.0.1') time.sleep(2) print('Welcome') try: login = edge_browser.find_element_by_name('username') password = edge_browser.find_element_by_name('password') sign = edge_browser.find_element_by_class_name('styled_button_s') login.clear() password.clear() login.send_keys('admin') password.send_keys('admin') time.sleep(2) sign.click() print('Sign in') alert = Alert(edge_browser) time.sleep(2) edge_browser.get('http://192.168.0.1/saveconf.htm') time.sleep(2) system = edge_browser.find_element_by_id('three_level_menu1') system.click() time.sleep(2) reboot = edge_browser.find_element_by_name('reboot') reboot.click() alert.accept() time.sleep(70) print('Reboot') edge_browser.quit() except: print('Problem with restarting modem') edge_browser.quit()
def test_legacy_driver_with_legacy_options(self): options = EdgeOptions() try: driver = Edge('MicrosoftWebDriver.exe', options=options) except Exception as e: self.assertTrue(False, 'Test legacy driver with legacy options failed.') else: driver.quit()
class ElementAccesser: def __init__(self, url: str): edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_argument('headless') edge_options.add_argument('disable-gpu') self.driver = Edge(executable_path='msedgedriver.exe', options=edge_options) self.driver.get(url) self.wait = WebDriverWait(self.driver, 10) def wait4Element(self, xpath: str): try: self.wait.until(EC.presence_of_element_located((By.XPATH, xpath))) except: return False def wait4Element4Ever(self, xpath: str): """Wait for an element forever""" while True: try: self.wait.until( EC.presence_of_element_located((By.XPATH, xpath))) return except: continue def findElement(self, xpath: str): """Find an element, if it doesn't exist, return False""" element = None try: element = self.driver.find_element_by_xpath(xpath) return element except: return False def inputInElement(self, element: object, content: str): element.send_keys(content) def clickInElement(self, element: object): if element == False: raise '\nelement was not found and you tried to click it' else: element.click() def checkBackgroundElement(self, element: object, color: str): """Compares the background of an element. color must have the following format: rgba(r, g, b, a)""" if str(element.value_of_css_property('background-color')) == color: return True else: return False def quitBrowser(self, quit_message: str): self.driver.quit() return quit_message
def test_chromium_driver_with_chromium_options(self): options = EdgeOptions() options.use_chromium = True try: driver = Edge('msedgedriver.exe', options=options) except: self.assertTrue( False, 'Test chromium driver with chromium options failed.') else: driver.quit()
def test_default(self): try: driver = Edge() cap = driver.capabilities self.assertEqual('MicrosoftEdge', cap['browserName'], 'Driver launches Edge Legacy.') except: self.assertTrue(False, 'Test default options failed.') else: driver.quit()
def test_legacy_options(self): try: options = EdgeOptions() options.use_chromium = False driver = Edge(options=options) cap = driver.capabilities self.assertEqual('MicrosoftEdge', cap['browserName'], 'Driver launches Edge Legacy.') except: self.assertTrue(False, 'Test legacy options failed.') else: driver.quit()
def extract_element(): options = EdgeOptions() options.headless = True options.use_chromium = True driver = Edge(executable_path=configs.msedge_driver_executable_path, options=options) driver.get(configs.home_page_url) time.sleep(4) element = driver.find_element_by_xpath(configs.html_image_element) html = element.get_attribute('outerHTML') driver.quit() return html
def test_chromium_options(self): try: options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) cap = driver.capabilities self.assertEqual('msedge', cap['browserName'], 'Driver launches Edge Chromium.') result = driver.execute_cdp_cmd('Browser.getVersion', {}) self.assertTrue('userAgent' in result, 'Driver can send Chromium-specific commands.') except: self.assertTrue(False, 'Test chromium options failed.') else: driver.quit()
class News163Spider(scrapy.Spider): name = 'news163' # allowed_domains = ['news.163.com'] start_urls = ['http://news.163.com/'] models_urls = [] def __init__(self): options = EdgeOptions() options.use_chromium = True # options.add_argument("headless") # options.add_argument("disable-gpu") #防止打印无用信息 enable-automation规避检测 options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) self.bro = Edge(options = options) def parse(self, response): li_list = response.xpath('//*[@id="index2016_wrap"]/div[1]/div[2]/div[2]/div[2]/div[2]/div/ul/li') alist = [3,4,6,7] for index in alist: model_url = li_list[index].xpath('./a/@href').extract_first() self.models_urls.append(model_url) for url in self.models_urls: yield scrapy.Request(url,callback=self.parse_model) def parse_model(self,response): div_list = response.xpath('/html/body/div/div[3]/div[4]/div[1]/div[1]/div/ul/li/div/div') for div in div_list: title = div.xpath('./div/div[1]/h3/a/text()').extract_first() news_detail_url = div.xpath('./div/div[1]/h3/a/@href').extract_first() item = News163SpiderItem() item['title'] = title #新闻详情页请求 yield scrapy.Request(url = news_detail_url,callback=self.parse_detail,meta={'item':item}) def parse_detail(self,response): content = response.xpath('//*[@id="content"]/div[2]//text()').extract() content = ''.join(content) item = response.meta['item'] print(item) item['content'] = content yield item def closed(self,spider): self.bro.quit()
def run(): email = read_decrypt(efile) password = read_decrypt(pfile) cemail = str(email) cpassword = str(password) print(cemail) print(cpassword) with open("browserdetails.txt", "r") as f: data = [line.rstrip('\n') for line in f] browser = data[0].lower() gpu = data[1].lower() if browser == 'edge': try: requests.get("http://www.google.com") print('Connection Found') options = EdgeOptions() options.use_chromium = True options.add_argument("--start-maximized") if gpu == 'no': options.add_argument("window-size=1920,1080") options.add_argument("--headless") options.add_argument("disable-gpu") options.add_argument("-inprivate") driver = Edge(executable_path='msedgedriver.exe', options=options) driver.get('https://gokano.com/') try: email = WebDriverWait(driver, 3).until( EC.presence_of_element_located((By.NAME, 'email'))) print("Page is ready!") email.send_keys(cemail) password = driver.find_element_by_name('password') password.send_keys(cpassword) time.sleep(3) button = driver.find_element_by_class_name('submit') button.click() print('Login sucessful') except TimeoutException: print("Error logining in") #[email protected] time.sleep(3) driver.quit() time.sleep(3) try: cdp = driver.find_element_by_link_text('Collect daily points') cdp.click() write_time() time.sleep(3) driver.quit() except NoSuchElementException: print('Already collected') time.sleep(3) driver.quit() print('Automation completed') time.sleep(3) driver.quit() except requests.ConnectionError: print('Could not connect')
class HeaderText(unittest.TestCase): def setUp(self): options = EdgeOptions() options.use_chromium = True options.binary_location = "C:\\Program Files (x86)\\Microsoft\\Edge Dev\\Application\\msedge.exe" dir = os.path.dirname(os.path.realpath(__file__)) edge_driver_path = dir + "\\edgedriver_win64\\msedgedriver.exe" self.driver = Edge(options=options, executable_path=edge_driver_path) self.driver.implicitly_wait(30) self.driver.maximize_window() self.driver.get("http://localhost:4200") def test_HeaderText(self): headerText = self.driver.find_element_by_css_selector("h1").get_attribute("innerText") self.assertEqual("todos", headerText) def tearDown(self): self.driver.quit()
class AddAToDoText(unittest.TestCase): def setUp(self): options = EdgeOptions() options.use_chromium = True options.binary_location = "C:\\Program Files (x86)\\Microsoft\\Edge Dev\\Application\\msedge.exe" dir = os.path.dirname(os.path.realpath(__file__)) edge_driver_path = dir + "\\edgedriver_win64\\msedgedriver.exe" self.driver = Edge(options=options, executable_path=edge_driver_path) self.driver.implicitly_wait(30) self.driver.maximize_window() self.driver.get("http://*****:*****@class='toggle']/following-sibling::label").get_attribute("innerText") self.assertEqual("The test is adding this todo", addedToDoText) def tearDown(self): self.driver.quit()
from time import sleep from msedge.selenium_tools import Edge, EdgeOptions #edge无头浏览器 phantomJs可用,已停止更新 options = EdgeOptions() options.use_chromium = True options.add_argument("headless") options.add_argument("disable-gpu") #防止打印无用信息 enable-automation规避检测 #最新版浏览器已无用 options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) #谷歌无头 #谷歌88.0版本可用 # from selenium.webdriver import Chrome # from selenium.webdriver import ChromeOptions # options = ChromeOptions() # chrome_options.add_argument('--headless') # chrome_options.add_argument('--disable-gpu') # options.add_experimental_option("excludeSwitches", ["enable-automation",'enable-logging']) # options.add_argument("--disable-blink-features=AutomationControlled") # options.add_experimental_option('useAutomationExtension', False) # wd = Chrome(options=options) wd = Edge(options=options) wd.get('https://www.baidu.com') print(wd.page_source) sleep(2) wd.quit()
class Chat: def __init__(self): self.login = None self.password = None self.message = None self.thread = None self.picture = None self._driver = None self._session = None self._executor_url = None self._base_url = 'https://www.messenger.com/' self._initiate() def _initiate(self): try: with open('SessionExecutor.txt') as f: data = f.readlines() data = [a.strip() for a in data] self._session = data[0] self._executor_url = data[1] self._driver = webdriver.Remote(command_executor=self._executor_url, desired_capabilities={}, options=options) self._driver.session_id = self._session print('same browser') except (FileNotFoundError, IndexError, MaxRetryError): self._driver = Edge("./msedgedriver.exe", options=options) with open('SessionExecutor.txt', 'w+', encoding='utf-8') as f: f.write(self._driver.session_id) f.write('\n') f.write(self._driver.command_executor._url) print("new browser") def _log_in(self): self._driver.get(self._base_url) self._driver.find_element_by_xpath('/html/body/div[2]/div[2]/div/div/div/div/div[3]/button[2]').click() self._driver.find_element_by_id("email").send_keys(self.login) self._driver.find_element_by_id("pass").send_keys(self.password, Keys.RETURN) def send_message(self, thread, message, login, password): try: self.message = message self.login = login self.password = password self.thread = thread except ValueError: return 'Lacking key parameters' if "/t" not in self._driver.current_url: self._log_in() url = self._base_url + 't/' + self.thread if self._driver.current_url != url: self._driver.get(url) WebDriverWait(self._driver, 10).until(ec.presence_of_element_located((By.CLASS_NAME, "_5rp7"))) self._driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div/div[1]/div[1]/ \ div[2]/div/div/div/div/div/div[1]/div[2]/div/div/div/div[2]/div/form/div/div[3]/div[2]/div[1]/div/ \ div/div/div/div[2]/div/div/div/div').send_keys(self.message, Keys.RETURN) def send_picture(self, thread, picture, login, password): try: self.picture = picture self.login = login self.password = password self.thread = thread except ValueError: return 'Lacking key parameters' if "/t" not in self._driver.current_url: self._log_in() url = self._base_url + 't/' + self.thread if self._driver.current_url != url: self._driver.get(url) WebDriverWait(self._driver, 10).until(ec.presence_of_element_located((By.XPATH, "/html/body/div[1] \ /div/div[1]/div/div[2]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div/div/div[2]/div/div/div/ \ div[2]/div/form/div/div[3]/div[1]/input"))) upload_picture = self._driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/ \ div/div/div[1]/div[1]/div[2]/div/div/div/div/div/div/div[2]/div/div/div/div[2]/div/form/div/ \ div[3]/div[1]/input') upload_picture.send_keys(self.picture) time.sleep(2) self._driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div/div[1]/ \ div[1]/div[2]/div/div/div/div/div/div[1]/div[2]/div/div/div/div[2]/div/form/div/div[3]/div[2] \ /div[1]/div/div/div/div/div[2]/div/div/div/div').send_keys(Keys.RETURN) def exit(self): self._driver.quit() os.system('cmd /c "taskkill /IM msedgedriver.exe /F /T"')
try: element = driver.find_element_by_id("Home") element.click() time.sleep(3) element = driver.find_element_by_id("Orders") element.click() time.sleep(3) element = driver.find_element_by_id("Portfolio") element.click() time.sleep(3) element = driver.find_element_by_id("Securities") element.click() time.sleep(3) element = driver.find_element_by_id("Analysis") element.click() time.sleep(3) element = driver.find_element_by_id("Logout") element.click() time.sleep(3) driver.close() driver.quit() except: driver.close() driver.quit() exit(5)
ping = browser.find_element_by_css_selector( '#verlauf-detail > tbody > tr:nth-child(3) > td:nth-child(3)' ).text #timestamp timestamp = browser.find_element_by_css_selector( '#testresult-detail > tbody > tr:nth-child(1) > td:nth-child(2) > span' ).text print("DL: " + downspeed + " ; UL: " + upspeed + " ; ping: " + ping) fields = [timestamp, downspeed, upspeed, ping, browser.current_url] with open(os.path.join(os.path.dirname(__file__), 'results.csv'), 'a', newline="") as f: writer = csv.writer(f) writer.writerow(fields) # save cookies pickle.dump( browser.get_cookies(), open(os.path.join(os.path.dirname(__file__), 'cookies.pkl'), "wb")) except TimeoutException: print("Loading took too much time!") print(browser.current_url) except TimeoutException: print("Didn't find accept button") print(browser.current_url) finally: browser.close() browser.quit()
def main(): searchtext = input() num_requested = int(input()) number_of_scrolls = num_requested / 400 + 1 # number_of_scrolls * 400 images will be opened in the browser if not os.path.exists(download_path + searchtext.replace(" ", "_")): os.makedirs(download_path + searchtext.replace(" ", "_")) url = "https://www.google.co.in/search?q="+searchtext+"&source=lnms&tbm=isch" chrome_driver_path = "msedgedriver.exe" browser_path = "C:\\Program Files (x86)\\Microsoft\\Edge Beta\\Application\\msedge.exe" option = EdgeOptions() option.binary_location = browser_path driver = Edge(executable_path = chrome_driver_path, options = option) driver.get(url) headers = {} headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" extensions = {"jpg", "jpeg", "png", "gif"} img_count = 0 downloaded_img_count = 0 for _ in range(int(number_of_scrolls)): for __ in range(15): driver.execute_script("window.scrollBy(0, 1000000)") time.sleep(0.2) time.sleep(0.5) try: driver.find_element_by_xpath( "//input[@value='Show more results']").click() except Exception as e: print("Less images found: {}".format(e)) break html = driver.page_source.split('"') imges = [] links = [] for i in html: if i.startswith('https:') and ('gstatic' not in i) and ('google' not in i): links.append(i.split('"')[0]) for i in html: if i.startswith('http') and 'usqp=CAU' in i.split('.')[-1]: imges.append(i.split('"')[0]) for i in html: if i.startswith('http') and i.split('"')[0].split('.')[-1] in extensions: imges.append(i.split('"')[0]) links = list(set(links)) imges = list(set(imges)) print(imges) links_left = Diff(links, imges) #removing duplicates urls_new = [] [urls_new.append(x) for x in links_left if x not in urls_new] file1 = open("page_source.txt", "w", encoding='utf8') file1.writelines(urls_new) img_type = [] print("Total images: {}\n".format(len(imges))) for img in imges: img_count += 1 print("Downloading image {}:{}".format(img_count, img)) img_type = img.rsplit('.', 1) try: req = Request(img, headers=headers) raw_img = urlopen(req).read() f = open(download_path+searchtext.replace(" ", "_")+"/" + str(downloaded_img_count)+"."+"jpeg", "wb") f.write(raw_img) f.close downloaded_img_count += 1 except Exception as e: print("Download failed: {}".format(e)) finally: print if downloaded_img_count >= num_requested: break print("Total downloaded: {}/{}".format(downloaded_img_count, img_count)) print("Total images: {}\n".format(len(urls_new))) for url in urls_new: img_count = img_scp.img_download(url, download_path+searchtext.replace(" ", "_")+"/", img_count) driver.quit()
def getRightStufAnimeData(memberStatus, title, bookType, currPageNum): #Starts wevdriver to scrape edge chromium options = EdgeOptions() options.use_chromium = True options.add_argument("-inprivate") options.add_argument("--headless") driver = Edge(options=options) #Create a empty list for all the data types we want to track titleList, priceList, stockStatusList = [], [], [] #Get the URL for the page we are going to scrape for data driver.get(getPageURL(bookType, currPageNum, title)) #Need to wait so the website can finish loading time.sleep(5) #Parse the HTML to start scraping for data soup = BeautifulSoup(driver.page_source, "html.parser") #Get the Title, Price, and Stock Status Data of each Manga Volume and whether or not next page button exists titleList = soup.find_all("span", {"itemprop" : "name"}) priceList = soup.find_all("span", {"itemprop" : "price"}) stockStatusList = soup.find_all("div", {"class" : "product-line-stock-container"}) nextPageButton = soup.find("li", {"class" : "global-views-pagination-next"}) #Check to see if the title given by the user generates a valid URL for RightStufAnime if not titleList: print("Error!!! Invalid Title, Use English Title Variant w/ Appropriate Spacing & Capitalization") return else: #If the URL is a "valid" RightStufAnime website URL websiteName = "RightStufAnime" gotAnimeDiscount = 0.05 #5% Manga discount #Format data into a single list for fullTitle, price, stockStatus in zip(titleList, priceList, stockStatusList): #get only the title and volume number for the series we are looking for if deParseString(title) in deParseString(fullTitle.text): #Fixes issue with capitilization if memberStatus: #If user is a member add discount priceVal = float(price.text[1:]) priceText = "$" + str(round((priceVal - (priceVal * gotAnimeDiscount)), 2)) #Add discount else: priceText = price.text stockCheck = stockStatus.text if stockCheck.find("Out of Stock") != -1: stockCheck = "Out of Stock" elif stockCheck.find("No Longer Available") != -1: stockCheck = "Out of Print" elif stockCheck.find("Pre-Order") != -1: stockCheck = "Pre-Order" else: stockCheck = "Available" dataFile.append([fullTitle.text, priceText, stockCheck]) #Check to see if there is another page if nextPageButton != None: currPageNum += 1 print(title) getRightStufAnimeData(memberStatus, title, bookType, currPageNum) #Initialize the a CSV to write into w/ appropiate headers csvFile = websiteName + "Data.csv" with open (csvFile, "w", newline = "", encoding = "utf-8") as file: writeToFile = csv.writer(file) writeToFile.writerow(["Title", "Price", "Stock Status"]) writeToFile.writerows(natsorted(dataFile)) #Sort data by title and write to the file driver.quit() return csvFile
def run(): email = read_decrypt(efile) password = read_decrypt(pfile) cemail = str(email) cpassword = str(password) with open("browserdetails.txt", "r") as f: data = [line.rstrip('\n') for line in f] browser = data[0].lower() gpu = data[1].lower() if browser == 'edge': try: requests.get("http://www.google.com") print('Connection Established.') l1 = tk.Label(top, text=" Connection Established. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) options = EdgeOptions() options.use_chromium = True options.add_argument("--start-maximized") if gpu == 'no': options.add_argument("window-size=1920,1080") options.add_argument("--headless") options.add_argument("disable-gpu") options.add_argument("-inprivate") driver = Edge(resource_path('msedgedriver.exe'), options=options) driver.get('https://gokano.com/') try: email = WebDriverWait(driver, 3).until( EC.presence_of_element_located((By.NAME, 'email'))) print("Page is ready!") l1 = tk.Label(top, text=" Page is Ready. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) email.send_keys(cemail) password = driver.find_element_by_name('password') password.send_keys(cpassword) time.sleep(3) button = driver.find_element_by_class_name('submit') button.click() try: driver.find_element_by_class_name('gokan-alert-error') print("Invalid Credintials") l1 = tk.Label(top, text=" Invalid Credintials. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) time.sleep(3) driver.quit() except NoSuchElementException: print('Login sucessful') l1 = tk.Label(top, text=" Login Successful. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) except TimeoutException: print("Login Error!") l1 = tk.Label(top, text=" Login Error! ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) # [email protected] time.sleep(3) driver.quit() time.sleep(3) try: cdp = driver.find_element_by_link_text('Collect daily points') cdp.click() write_time() time.sleep(3) driver.quit() except NoSuchElementException: print('Already collected') l1 = tk.Label(top, text=" Points Already Collected. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) time.sleep(3) driver.quit() print('Automation completed') l1 = tk.Label(top, text=" Automation Completed. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) time.sleep(3) write_time() driver.quit() except requests.ConnectionError: print('Could not connect') l1 = tk.Label(top, text=" Couldn't Connect. ", bg='white') l1.config(font=('helvetica', 15, "bold")) canvas.create_window(200, 410, window=l1) driver.quit()
class Sei: __area_inicial = None __windows_before = 0 __windows_after = 0 def __init__(self, headless=False, executable_path='chromedriver'): if 'chromedriver' in executable_path: chrome_options = Options() chrome_options.add_argument('--enable-javascript') chrome_options.add_argument('--window-size=1440,900') chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--proxy-server='direct://'") chrome_options.add_argument("--proxy-bypass-list=*") chrome_options.add_argument("--start-maximized") chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--ignore-certificate-errors') if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') self.driver = webdriver.Chrome(executable_path=executable_path, options=chrome_options) elif 'msedgedriver' in executable_path: edge_options = EdgeOptions() edge_options.use_chromium = True edge_options.add_argument('enable-javascript') edge_options.add_argument('window-size=1440,900') edge_options.add_argument("disable-extensions") edge_options.add_argument("proxy-server='direct://'") edge_options.add_argument("proxy-bypass-list=*") edge_options.add_argument("start-maximized") edge_options.add_argument('disable-dev-shm-usage') edge_options.add_argument('no-sandbox') edge_options.add_argument('ignore-certificate-errors') if headless: edge_options.add_argument('headless') edge_options.add_argument('disable-gpu') self.driver = Edge(executable_path=executable_path, options=edge_options) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def start_driver(self, url, usuario=None, senha=None): if usuario == None: usuario = input('Digite o usuário: ') if senha == None: senha = getpass('Digite a senha: ') self.driver.get(url) usuario_field = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "txtUsuario"))) senha_field = self.driver.find_element_by_id('pwdSenha') botao_acessar = self.driver.find_element_by_id('sbmLogin') usuario_field.clear() usuario_field.send_keys(usuario) senha_field.clear() senha_field.send_keys(senha) botao_acessar.click() alerta = self.fechar_alerta() if alerta: raise Exception(alerta) # usuário ou senha inválido self.__area_incial = self.get_area() def go_to(self, numero_sei): if self.__windows_after > self.__windows_before: self.driver.close() self.driver.switch_to.window( self.driver.window_handles[self.__windows_before - 1]) self.driver.switch_to.default_content() pesquisa = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "txtPesquisaRapida"))) pesquisa.clear() pesquisa.send_keys(str(numero_sei)) formPesquisaRapida = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located( (By.ID, "frmProtocoloPesquisaRapida"))) self.__windows_before = len(self.driver.window_handles) formPesquisaRapida.submit() self.__windows_after = len(self.driver.window_handles) if self.__windows_after > self.__windows_before: self.driver.switch_to.window( self.driver.window_handles[self.__windows_after - 1]) def is_processo_aberto(self, area=None, processo=None): if processo: self.go_to(processo) else: self.driver.switch_to.default_content() try: ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) informacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "divInformacao"))) mensagem = informacao.text aberto = 'aberto' in mensagem if area: regex = '(?im)^(.*)(' + area + ')[^0-9a-z](.*)$' matches = search(regex, mensagem) if matches: aberto = True else: aberto = False self.driver.switch_to.default_content() except: aberto = None mensagem = 'Impossível abrir mensagem do processo' return aberto, mensagem def get_processo_anexador(self, processo=None): if processo: self.go_to(processo) else: self.driver.switch_to.default_content() ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) informacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "divInformacao"))) procAnex = None if 'Processo anexado ao processo' in informacao.text: processoAnexador = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located( (By.XPATH, "//*[@id=\"divInformacao\"]/div/a"))) procAnex = processoAnexador.text self.driver.switch_to.default_content() return procAnex def get_area(self): self.driver.switch_to.default_content() select = Select(self.driver.find_element_by_id('selInfraUnidades')) return select.all_selected_options[0].text def seleciona_area(self, area): self.driver.switch_to.default_content() select = Select(self.driver.find_element_by_id('selInfraUnidades')) all_selected_options = select.all_selected_options for option in all_selected_options: if area == option.text: return True select = Select(self.driver.find_element_by_id('selInfraUnidades')) options = select.options for option in options: if area == option.text: select.select_by_visible_text(area) Select( WebDriverWait(self.driver, 3).until( EC.presence_of_element_located( (By.ID, 'selInfraUnidades')))) return True return False def clicar_botao(self, botao): self.driver.switch_to.default_content() ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) arvore = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "divArvoreAcoes"))) botoes = arvore.find_elements(By.XPATH, '//*[@id=\"divArvoreAcoes\"]/a') for b in botoes: img = b.find_element(By.XPATH, 'img') if botao in img.get_attribute('title'): b.click() try: WebDriverWait(self.driver, 1).until( EC.alert_is_present(), 'Timed out waiting for PA creation ' + 'confirmation popup to appear.') except: try: self.driver.switch_to.default_content() except: None return True return False def fechar_alerta(self): alerta = None try: WebDriverWait(self.driver, 3).until( EC.alert_is_present(), 'Timed out waiting for PA creation ' + 'confirmation popup to appear.') alert = self.driver.switch_to.alert alerta = alert.text alert.accept() self.driver.switch_to.default_content() except TimeoutException: None return alerta def is_sobrestado(self, area=None, processo=None): if processo: self.go_to(processo) else: self.driver.switch_to.default_content() ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) informacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "divInformacao"))) sobrestado = 'sobrestado' in informacao.text mensagem = informacao.text self.driver.switch_to.default_content() if area: regex = '(?im)^(.*)(' + area + ')[^0-9a-z](.*)$' matches = search(regex, informacao.text) return sobrestado, matches != None else: return sobrestado, mensagem def sobrestar_processo(self, motivo, processo=None): if processo: self.go_to(processo) else: self.driver.switch_to.default_content() if self.clicar_botao('Sobrestar Processo'): ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) self.driver.find_element(By.ID, 'divOptSomenteSobrestar').click() motivoField = self.driver.find_element(By.ID, 'txaMotivo') motivoField.clear() motivoField.send_keys(motivo) self.driver.find_element(By.ID, 'sbmSalvar').click() self.driver.switch_to.default_content() return True return False def remover_sobrestamento(self, processo=None): if processo: self.go_to(processo) if self.clicar_botao('Remover Sobrestamento do Processo'): self.fechar_alerta() return True return False def publicar(self, resumo_ementa, data_disponibilizacao, documento=None, dou=False, secao=None, pagina=None): if documento: self.go_to(documento) else: self.driver.switch_to.default_content() if self.clicar_botao('Agendar Publicação'): ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) resumo_ementa_text_field = self.driver.find_element( By.ID, 'txaResumo') resumo_ementa_text_field.clear() resumo_ementa_text_field.send_keys(resumo_ementa) disponibilizacao = self.driver.find_element( By.ID, 'txtDisponibilizacao') disponibilizacao.clear() disponibilizacao.send_keys(data_disponibilizacao) if dou: select = Select(self.driver.find_element_by_id('selVeiculoIO')) select.select_by_visible_text('DOU') select = Select( WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "selSecaoIO")))) WebDriverWait(self.driver, 3).until( EC.presence_of_element_located( (By.CSS_SELECTOR, "option[value='" + secao if secao else '3' + "']"))) select.select_by_visible_text(secao if secao else '3') pagina_text_field = self.driver.find_element( By.ID, 'txtPaginaIO') pagina_text_field.clear() pagina_text_field.send_keys(pagina if pagina else '') disponibilizacao = self.driver.find_element(By.ID, 'txtDataIO') disponibilizacao.clear() disponibilizacao.send_keys(data_disponibilizacao) self.driver.find_element_by_id('btnSalvar').click() self.driver.switch_to.default_content() return True return False def get_conteudo_documento(self, documento=None): if documento: self.go_to(documento) else: self.driver.switch_to.default_content() try: ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) ifrArvoreHtml = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrArvoreHtml"))) self.driver.switch_to.frame(ifrArvoreHtml) documento_conteudo = self.driver.find_element_by_xpath( '/html/body').get_attribute('innerHTML') documento_conteudo = sub( r'\\n', '', documento_conteudo) # retirar quebra de páginas documento_conteudo = sub(r'\s\s+?', ' ', documento_conteudo) # tira espaços duplos documento_conteudo = sub(r' ', ' ', documento_conteudo) # tira espaços duplos documento_conteudo = documento_conteudo.strip( ) # retirar quebras de páginas que tenham restado return documento_conteudo except: raise Exception('Conteúdo do documento %s não encontrado.' % documento) finally: self.driver.switch_to.default_content() def get_documento_element_by_id(self, id, documento=None): if documento: self.go_to(documento) else: self.driver.switch_to.default_content() try: if (self.__windows_after == self.__windows_before): ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) ifrArvoreHtml = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrArvoreHtml"))) self.driver.switch_to.frame(ifrArvoreHtml) return self.driver.find_element_by_id(id).text except: raise Exception('Conteúdo do documento %s não encontrado.' % documento) finally: self.driver.switch_to.default_content() def get_documento_elements_by_id(self, id, documento=None): if documento: self.go_to(documento) else: self.driver.switch_to.default_content() try: if (self.__windows_after == self.__windows_before): ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) ifrArvoreHtml = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrArvoreHtml"))) self.driver.switch_to.frame(ifrArvoreHtml) elements = self.driver.find_elements_by_id(id) return [element.text for element in elements] except: raise Exception('Conteúdo do documento %s não encontrado.' % documento) finally: self.driver.switch_to.default_content() def get_documento_element_by_xpath(self, xpath, documento=None): if documento: self.go_to(documento) else: self.driver.switch_to.default_content() try: if (self.__windows_after == self.__windows_before): ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) ifrArvoreHtml = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrArvoreHtml"))) self.driver.switch_to.frame(ifrArvoreHtml) return self.driver.find_element_by_xpath(xpath).text except: raise Exception('Conteúdo do documento %s não encontrado.' % documento) finally: self.driver.switch_to.default_content() def get_documento_elements_by_xpath(self, xpath, documento=None): if documento: self.go_to(documento) else: self.driver.switch_to.default_content() try: if (self.__windows_after == self.__windows_before): ifrVisualizacao = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrVisualizacao"))) self.driver.switch_to.frame(ifrVisualizacao) ifrArvoreHtml = WebDriverWait(self.driver, 3).until( EC.presence_of_element_located((By.ID, "ifrArvoreHtml"))) self.driver.switch_to.frame(ifrArvoreHtml) elements = self.driver.find_elements_by_xpath(xpath) return [element.text for element in elements] except: raise Exception('Conteúdo do documento %s não encontrado.' % documento) finally: self.driver.switch_to.default_content() def close(self, voltar=True): if voltar: self.seleciona_area(self.__area_incial) self.driver.close() self.driver.quit()
def info(self): options = EdgeOptions() options.use_chromium = True #options.add_argument('--start-maximized') options.add_argument('--disable-extensions') driver_path = 'Driver\\msedgedriver.exe' #Opciones de navegacion driver = Edge(executable_path=driver_path, options=options) #inicializamos el navegador driver.get('https://www.accuweather.com/') Departamento = "Paysandú" #COOKIES WebDriverWait(driver, 10)\ .until(EC.element_to_be_clickable((By.XPATH, '/html/body/div/div[9]/div/div')))\ .click() #BUSCADOR WebDriverWait(driver, 10)\ .until(EC.element_to_be_clickable((By.XPATH, '/html/body/div/div[1]/div[2]/div[1]/form/input')))\ .send_keys(Departamento) #CIUDAD WebDriverWait(driver, 10)\ .until(EC.element_to_be_clickable((By.XPATH, '/html/body/div/div[1]/div[2]/div[2]/div[2]/div')))\ .click() #DIAS WebDriverWait(driver, 10)\ .until(EC.element_to_be_clickable((By.XPATH, '/html/body/div/div[3]/div/div[3]/a[3]')))\ .click() card = WebDriverWait(driver, 20)\ .until(EC.frame_to_be_available_and_switch_to_it((By.NAME, "google_ads_iframe_/6581/web/sam/interstitial/weather/local_home_0"))) if (card): WebDriverWait(driver, 10)\ .until(EC.element_to_be_clickable((By.XPATH, "/html/body/div/div/div[1]/div[1]"))).click() #INFO WebDriverWait(driver, 10)\ .until(EC.element_to_be_clickable((By.XPATH, '/html/body/div/div/div[1]/div[1]/div'))) info_clima = driver.find_element_by_xpath( '/html/body/div/div[5]/div[1]/div[1]') info_clima = info_clima.text titulo = driver.find_element_by_css_selector('p.module-title') titulo = titulo.text #print(titulo) #SEPARAR datos_semana = info_clima.split(titulo)[1].split('\n')[1:36] driver.quit() return datos_semana
class ChannelScrape: """ Constructors: __init__() Methods: toFile(), getUpcomingId(), getLiveId() """ options_edge = EdgeOptions() options_edge.use_chromium = True options_edge.add_argument('--ignore-certificate-errors') options_edge.add_argument('--ignore-ssl-errors') options_edge.add_argument('--mute-audio') def __init__(self, channelId: str, headless=True, executable_path=None): # Searches for webdriver on each dir from PATH environment variables # Currently untested in linux if executable_path == None: for p in os.environ['PATH'].split(";"): if os.path.isfile(p + "\msedgedriver.exe"): self.path_dir = p + "\msedgedriver.exe" # Setup driver self.options_edge.headless = headless self.driver = Edge(options=self.options_edge, executable_path=self.path_dir) # JSON collecting process url = 'https://www.youtube.com/channel/' + channelId self.driver.get(url) self.jsonData = self.driver.execute_script('return ytInitialData') self.driver.quit() def toFile(self, output_file: str): """ Output the collected json data to a file output_file: Output file name. File extension will be added automatically """ with codecs.open(output_file + '.json', 'w', encoding='utf-8') as jsonFile: json.dump(self.jsonData, jsonFile, ensure_ascii=False, indent=1) def getUpcomingId(self, dayDelta=14): """ Returns a list of upcoming livestream(s) video ID dayDelta: If the upcoming livestream delta is more than the provided argument, the livestream Id will not be added to the return list """ # Personal note: # The base for calculating dates is 31-12-1969 (UNIX epoch time) # Which is then counted to the used date by seconds dateFilter = timedelta(days=dayDelta) dateThreshold = datetime.now() + dateFilter collectedContents = [] try: content = self.jsonData['contents'][ 'twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer'][ 'content']['sectionListRenderer']['contents'][1][ 'itemSectionRenderer']['contents'][0]['shelfRenderer'][ 'content'] except: print( 'Index out of range (Most likely channel only have horizontal grid renderer)' ) return collectedContents # Only one upcoming livestream # This shouldn't need to use for loop assuming that there is always one item in items key # But items is still an array, so just in case if "expandedShelfContentsRenderer" in content: for item in content['expandedShelfContentsRenderer']['items']: liveDateEpoch = int( item['videoRenderer']['upcomingEventData']['startTime']) liveDate = datetime.fromtimestamp(mktime( gmtime(liveDateEpoch))) if item['videoRenderer']['thumbnailOverlays'][0][ 'thumbnailOverlayTimeStatusRenderer'][ 'style'] == "UPCOMING" and liveDate < dateThreshold: collectedContents.append(item['videoRenderer']['videoId']) # Multiple upcoming livestreams elif "horizontalListRenderer" in content: for item in content['horizontalListRenderer']['items']: if 'upcomingEventData' in item['gridVideoRenderer']: liveDateEpoch = int(item['gridVideoRenderer'] ['upcomingEventData']['startTime']) liveDate = datetime.fromtimestamp( mktime(gmtime(liveDateEpoch))) if item['gridVideoRenderer']['thumbnailOverlays'][0][ 'thumbnailOverlayTimeStatusRenderer'][ 'style'] == "UPCOMING" and liveDate < dateThreshold: collectedContents.append( item['gridVideoRenderer']['videoId']) return collectedContents def getLiveId(self): # Returns a list of the current livestreams video Id, if any # It is unlikely that there are multiple livestreams in the same channel, # but the possibility is there, therefore it returns a list instead of a single item content = self.jsonData['contents']['twoColumnBrowseResultsRenderer'][ 'tabs'][0]['tabRenderer']['content']['sectionListRenderer'][ 'contents'][0]['itemSectionRenderer']['contents'][0] collectedContents = [] if "channelFeaturedContentRenderer" in content: for videoItem in content['channelFeaturedContentRenderer'][ 'items']: if videoItem['videoRenderer']['thumbnailOverlays'][0][ 'thumbnailOverlayTimeStatusRenderer'][ 'style'] == "LIVE": collectedContents.append( videoItem['videoRenderer']['videoId']) return collectedContents
def download(url): options = EdgeOptions() options.use_chromium =True # option = webdriver.ChromeOptions() # option.add_argument('headless') options.add_argument('log-level=3') driver = Edge(options=options) # driver = webdriver.Chrome( # executable_path='.//chromedriver', chrome_options=option) title = "output" try: driver.set_page_load_timeout(15) driver.get(url) title = driver.title except: print("Timeout - start download anyway.") print(f'道客巴巴: 《{title}》') time.sleep(5) try: # 展开全部 elem_cont_button = driver.find_element_by_id("continueButton") driver.execute_script( "arguments[0].scrollIntoView(true);", elem_cont_button) actions = ActionChains(driver) actions.move_to_element(elem_cont_button).perform() time.sleep(0.5) elem_cont_button.click() except NoSuchElementException: pass # 获取页数 num_of_pages = driver.find_element_by_id('readshop').find_element_by_class_name( 'mainpart').find_element_by_class_name('shop3').find_element_by_class_name('text').get_attribute('innerHTML') num_of_pages = int(num_of_pages.split(' ')[-1]) for i in range(5): # 缩放 driver.find_element_by_id('zoomInButton').click() time.sleep(0.5) if os.path.exists(f'./temp/{title}'): shutil.rmtree(f'./temp/{title}') os.makedirs(f'./temp/{title}') for pages in trange(num_of_pages): time.sleep(0.5) canvas_id = "page_" + str(pages + 1) pagepb_id = "pagepb_" + str(pages + 1) element = driver.find_element_by_id(canvas_id) driver.execute_script("arguments[0].scrollIntoView(true);", element) actions = ActionChains(driver) actions.move_to_element(element).perform() time.sleep(0.5) # Check loading status while(len(driver.find_element_by_id(pagepb_id).get_attribute('innerHTML')) != 0): time.sleep(1) # print(driver.find_element_by_id( # pagepb_id).get_attribute('innerHTML')) js_cmd = "var canvas = document.getElementById('{}');".format(canvas_id) + \ "return canvas.toDataURL();" img_data = driver.execute_script(js_cmd) img_data = (img_data[22:]).encode() with open(f"./temp/{title}/{pages}.png", "wb") as fh: fh.write(base64.decodebytes(img_data)) driver.quit() print('下载完毕,正在转码') conpdf(f'output/{title}.pdf', f'temp/{title}', '.png')