def get_link(self, query, s_date, e_date): self.query = query self.s_date=s_date self.e_date=e_date options = webdriver.ChromeOptions() options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36") options.add_argument('headless') options.add_argument('--disable-gpu') options.add_argument('lang=ko_KR') browser = WebDriver(executable_path='D:\python_workspace\pyTextMiner\selenium_server\chromedriver.exe', options=options) #browser = WebDriver(executable_path='/usr/lib/chromium-browser/chromedriver', options=options) url = "https://m.search.naver.com/search.naver?where=m_blog&sm=mtb_opt&query=" + query + "&display=15&st=sim&nso=p%3Afrom" + s_date + "to" + e_date browser.get(url) browser.implicitly_wait(random.randrange(5,10)) SCROLL_PAUSE_TIME = 1.5 # Get scroll height last_height = browser.execute_script("return document.body.scrollHeight") while True: # Scroll down to bottom browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Wait to load page time.sleep(SCROLL_PAUSE_TIME) # Calculate new scroll height and compare with last scroll height new_height = browser.execute_script("return document.body.scrollHeight") if new_height == last_height: cont = browser.page_source soup = BeautifulSoup(cont, 'html.parser') for urls in soup.select(".total_dsc"): if urls["href"].startswith("https://m.blog.naver.com") or 'blog.me' in urls["href"]: self.ab_url.append(urls['href']) break last_height = new_height time.sleep(random.randrange(5,15))
def completeMorePromotionABC(browser: WebDriver, cardNumber: int): browser.find_element_by_xpath( '//*[@id="more-activities"]/div/mee-card[' + str(cardNumber) + ']/div/card-content/mee-rewards-more-activities-card-item/div/div[3]/a' ).click() time.sleep(1) browser.switch_to.window(window_name=browser.window_handles[1]) time.sleep(8) counter = str( browser.find_element_by_xpath('//*[@id="QuestionPane0"]/div[2]'). get_attribute('innerHTML'))[:-1][1:] numberOfQuestions = max([int(s) for s in counter.split() if s.isdigit()]) for question in range(numberOfQuestions): browser.execute_script( 'document.evaluate("//*[@id=\'QuestionPane' + str(question) + '\']/div[1]/div[2]/a[' + str(random.randint(1, 3)) + ']/div", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click()' ) time.sleep(5) browser.find_element_by_xpath( '//*[@id="AnswerPane' + str(question) + '"]/div[1]/div[2]/div[4]/a/div/span/input').click() time.sleep(3) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2)
def completeDailySetVariableActivity(browser: WebDriver, cardNumber: int): time.sleep(2) browser.find_element_by_xpath( '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' + str(cardNumber) + ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a' ).click() time.sleep(1) browser.switch_to.window(window_name=browser.window_handles[1]) time.sleep(8) try: browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click() waitUntilVisible(browser, By.XPATH, '//*[@id="currentQuestionContainer"]/div/div[1]', 3) except (NoSuchElementException, TimeoutException): try: counter = str( browser.find_element_by_xpath( '//*[@id="QuestionPane0"]/div[2]').get_attribute( 'innerHTML'))[:-1][1:] numberOfQuestions = max( [int(s) for s in counter.split() if s.isdigit()]) for question in range(numberOfQuestions): browser.execute_script( 'document.evaluate("//*[@id=\'QuestionPane' + str(question) + '\']/div[1]/div[2]/a[' + str(random.randint(1, 3)) + ']/div", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click()' ) time.sleep(5) browser.find_element_by_xpath( '//*[@id="AnswerPane' + str(question) + '"]/div[1]/div[2]/div[4]/a/div/span/input').click() time.sleep(3) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2) return except NoSuchElementException: time.sleep(random.randint(5, 9)) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2) return time.sleep(3) correctAnswer = browser.execute_script( "return _w.rewardsQuizRenderInfo.correctAnswer") if browser.find_element_by_id("rqAnswerOption0").get_attribute( "data-option") == correctAnswer: browser.find_element_by_id("rqAnswerOption0").click() else: browser.find_element_by_id("rqAnswerOption1").click() time.sleep(10) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2)
def login(driver: WebDriver): imgFilePath = veryeast_config.SCREEN_IMG_DIR + "/img.png" # 打开后台网址 driver.get(veryeast_config.BG_SYSTEM_URL) # 获取屏幕缩放因子 devicePixelRatio = driver.execute_script("return window.devicePixelRatio") print("devicePixelRatio=%s" % devicePixelRatio) width = driver.execute_script( "return document.documentElement.scrollWidth") height = driver.execute_script( "return document.documentElement.scrollHeight") print("width=%s, height=%s" % (width, height)) driver.set_window_size(width, height) # 输入用户名 inputUserNameElement = driver.find_element_by_id("username") inputUserNameElement.send_keys(base.config.account.veryeast_username) loginElement = driver.find_element_by_id("butn") while loginElement is not None: loginElement = inputPwdCaptchaAndLogin(driver, imgFilePath, devicePixelRatio) # 进入主页面后 WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.CLASS_NAME, "sider___g53Yu")))
def _scroll_to_elem(driver: WebDriver, elem: WebElement, y_delta=-70, step=70, verbose=False, stop_if_visible=True): prev_y = -1 while True: elem_y = elem.location['y'] target_y = elem.location['y'] + y_delta cur_y = driver.execute_script('return window.pageYOffset') if verbose: print( f'scroll_to_elem: target_y: {target_y} elem.displayed: {elem.is_displayed()} ' f'cur_y: {cur_y}') if abs(target_y - cur_y) < 50: driver.execute_script(f"window.scrollTo(0, {target_y})") prev_y = cur_y break elif (cur_y == prev_y) and elem.is_displayed() and elem.is_enabled(): break else: direction = +1.0 if (target_y - cur_y) >= 0 else -1.0 next_y = int(cur_y + direction * step * random.lognormvariate(0, 0.2)) driver.execute_script(f"window.scrollTo(0, {next_y})") prev_y = cur_y if should_stop(): break _human_wait(0.05)
def assertLogin(d: WebDriver): d.get("https://myaccount.google.com/") sleep(0.4) if match(r"^(http|https):\/\/(myaccount\.google\.com).*$", d.current_url): d.find_element_by_xpath( "/html/body/div[2]/header/div[2]/div[3]/div[1]/div/div/a").click() sleep(0.1) if d.execute_script( "return (document.querySelector('.gb_sb').innerText == \"%s\" ? true : false)" % getenv('GEMAIL')): return else: d.get("https://accounts.google.com/Logout") sleep(1) pass else: pass d.get( 'https://accounts.google.com/o/oauth2/v2/auth/oauthchooseaccount?redirect_uri=https%3A%2F%2Fdevelopers.google.com%2Foauthplayground&prompt=consent&response_type=code&client_id=407408718192.apps.googleusercontent.com&scope=email&access_type=offline&flowName=GeneralOAuthFlow' ) sleep(1) d.execute_script(""" if (document.querySelector('.OVnw0d') != null) { for (li of document.querySelector('.OVnw0d').children) { if (li.innerText == "Use another account") { li.children[0].click(); } } return true; } else { return false; } """) sleep(1) try: d.find_element_by_xpath( "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div/div[1]/div/div[1]/input" ).send_keys(getenv('GEMAIL')) d.find_element_by_xpath( "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div/div[1]/div/div[1]/input" ).send_keys(Keys.RETURN) sleep(2) d.find_element_by_xpath( "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div[1]/div/div/div/div/div[1]/div/div[1]/input" ).send_keys(getenv('GPASS')) d.find_element_by_xpath( "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div[1]/div/div/div/div/div[1]/div/div[1]/input" ).send_keys(Keys.RETURN) except NoSuchElementException: d.find_element_by_id("Email").send_keys(getenv('GEMAIL')) d.find_element_by_id("Email").send_keys(Keys.RETURN) sleep(2) d.find_element_by_id("password").send_keys(getenv('GPASS')) d.find_element_by_id("password").send_keys(Keys.RETURN) sleep(2) assert "developers.google.com/oauthplayground" in d.current_url
def __play(self, task_driver: WebDriver): js = ''' var d = document.getElementsByTagName("div"); for (var i=0;i<d.length;i++){ if(d[i].className == \'''' + VIDEO_PLAY_CLASS_NAME2 + '''\'){ d[i].click(); break; } }''' task_driver.execute_script(script=js)
def setup_proxy(driver: WebDriver, ip, port): driver.get("about:config") proxy_type = 5 if port == 0 else 1 setupScript = f"""var prefs = Components.classes["@mozilla.org/preferences-service;1"] .getService(Components.interfaces.nsIPrefBranch); prefs.setIntPref("network.proxy.type", {proxy_type}); prefs.setCharPref("network.proxy.socks", "{ip}"); prefs.setIntPref("network.proxy.socks_port", {port}); prefs.setBoolPref("network.proxy.socks_remote_dns",false);""" driver.execute_script(setupScript)
def collect_moments(driver: WebDriver): driver.get(moments_url) while True: js = 'window.scrollBy(0,10000)' driver.execute_script(js) videos = driver.find_elements_by_class_name("card") if len(videos) >= max_video: break time.sleep(1) return videos
def completeDailySetQuiz(browser: WebDriver, cardNumber: int): time.sleep(2) browser.find_element_by_xpath( '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' + str(cardNumber) + ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a' ).click() time.sleep(1) browser.switch_to.window(window_name=browser.window_handles[1]) time.sleep(8) if not waitUntilQuizLoads(browser): resetTabs(browser) return browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click() waitUntilVisible(browser, By.XPATH, '//*[@id="currentQuestionContainer"]/div/div[1]', 10) time.sleep(3) numberOfQuestions = browser.execute_script( "return _w.rewardsQuizRenderInfo.maxQuestions") numberOfOptions = browser.execute_script( "return _w.rewardsQuizRenderInfo.numberOfOptions") for question in range(numberOfQuestions): if numberOfOptions == 8: answers = [] for i in range(8): if browser.find_element_by_id("rqAnswerOption" + str( i)).get_attribute("iscorrectoption").lower() == "true": answers.append("rqAnswerOption" + str(i)) for answer in answers: browser.find_element_by_id(answer).click() time.sleep(5) if not waitUntilQuestionRefresh(browser): return time.sleep(5) elif numberOfOptions == 4: correctOption = browser.execute_script( "return _w.rewardsQuizRenderInfo.correctAnswer") for i in range(4): if browser.find_element_by_id( "rqAnswerOption" + str(i)).get_attribute("data-option") == correctOption: browser.find_element_by_id("rqAnswerOption" + str(i)).click() time.sleep(5) if not waitUntilQuestionRefresh(browser): return break time.sleep(5) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2)
def email_login(driver: WebDriver, user_account, user_password): """ 126邮箱登录验证 :param driver:浏览器驱动 :return: """ url = "https://passport.126.com/ydzj/maildl?product=mail126&pdconf=yddl_mail126_conf&mc=146E1F&curl=https%3A%2F%2Fmail.126.com%2Fentry%2Fcgi%2Fntesdoor%3Ffrom%3Dsmart%26language%3D0%26style%3D11%26destip%3D192.168.202.48%26df%3Dsmart_ios" js = 'window.open("{}");'.format(url) driver.execute_script(js) handles = driver.window_handles print(handles) driver.switch_to_window(handles[1]) # 切换回原来页面 time.sleep(10) emial_account = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'input[name="account"]'))) super_sendkeys(emial_account, user_account.split("@")[0]) time.sleep(5) email_password = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'input[type="password"]'))) super_sendkeys(email_password, user_password) time.sleep(5) login_email = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'input[type="password"]'))) login_email.send_keys(Keys.ENTER) time.sleep(5) # 处理弹框 time.sleep(30) alter_info = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'div[class="msgbox-simpleText "]'))) if alter_info: alter_button = driver.find_elements_by_css_selector( 'span[class="btn-inner"]') alter_button[2].click() handles = driver.window_handles print(handles) driver.switch_to_window(handles[1]) # 切换回原来页面 search_button = driver.find_element_by_css_selector( 'div[class="toolbar-optItem "]') search_button.click() send_info = driver.find_element_by_css_selector('input[class="ipt-input"]') send_info.send_keys("facebook") send_info.send_keys(Keys.ENTER)
def find_more_butto_and_click(driver: WebDriver): more_button = None for more_btn_class in MORE_BUTTON_CLASS: try: more_button = driver.find_element_by_class_name(more_btn_class) except: pass if more_button is not None: print("点击[继续阅读]按钮,加载所有子页面") driver.execute_script("arguments[0].click();", more_button) time.sleep(2) return print("没有找到[继续阅读]按钮")
def _scroll_down_like_human(driver: WebDriver, step=70, wait=0.03): pos = 100 prev_yoffset = 0 while True: driver.execute_script(f"window.scrollTo(0, {pos})") pos += step * random.lognormvariate(0, 0.1) _human_wait(wait) yoffset = driver.execute_script('return window.pageYOffset;') if yoffset == prev_yoffset or should_stop(): break prev_yoffset = yoffset
def get_job_items_per_tab(driver: WebDriver, main_container: dict): try: list_containers = driver.find_elements(By.CLASS_NAME, 'list-container')[:-1] keys = list(main_container.keys()) print("Fetching data...") for idx in range(len(list_containers)): job_list = [] job_container = list_containers[idx].find_elements( By.TAG_NAME, 'div')[3] job_items = job_container.find_elements(By.TAG_NAME, 'a') for job_item in tqdm(job_items): driver.execute_script( f"window.open('{job_item.get_attribute('href')}', '_blank');" ) windows = driver.window_handles sleep(3) driver.switch_to.window(windows[1]) driver.implicitly_wait(5) # create a Job object from form input fields company = driver.find_element( By.XPATH, "//input[@placeholder='Company']").get_attribute('value') job_title = driver.find_element( By.XPATH, "//input[@placeholder='+ add title']").get_attribute( 'value') location = driver.find_element( By.XPATH, "//input[@placeholder='+ add location']").get_attribute( 'value') description = driver.find_element(By.CLASS_NAME, 'ql-editor').text post_url = driver.find_element( By.XPATH, "//p[@title='Post URL']/following-sibling::div" ).find_element(By.TAG_NAME, 'a').get_attribute('href') a_job = Job(company, job_title, post_url, location, description) job_list.append(a_job.as_dict()) driver.close() driver.switch_to.window(windows[0]) main_container[keys[idx]] = job_list except Exception as err: print(f"Error getting job_container:" + str(err)) sys.exit(2)
def GetExampleAndSchema(driver: WebDriver) -> Tuple[str, str]: """Extract JSON schema and examples from an endpoint page.""" # Attempt to get the data from the bottom table. # This is the schema for a POST request payload for upload. example = driver.execute_script('return jQuery("textarea.payload_text").val();') schema = driver.execute_script('return jQuery("textarea.payload_text_schema").val();') if example is None: # Attempt to get the date from the table on the right side. # This is the schema for the GET's response. example = driver.execute_script('return jQuery("textarea#response_body_example").val();') schema = driver.execute_script('return jQuery("textarea#response_body_schema").val();') if example is None: # Give up, there's probably no table. example = '' schema = '' return example, schema
def completeDailySetThisOrThat(browser: WebDriver, cardNumber: int): time.sleep(2) browser.find_element_by_xpath( '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' + str(cardNumber) + ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a' ).click() time.sleep(1) browser.switch_to.window(window_name=browser.window_handles[1]) time.sleep(8) loaded = False while (loaded == False): try: browser.find_element_by_xpath('//*[@id="rqStartQuiz"]') loaded = True except: time.sleep(0.5) browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click() waitUntilVisible(browser, By.XPATH, '//*[@id="currentQuestionContainer"]/div/div[1]', 10) time.sleep(3) for question in range(10): answerEncodeKey = browser.execute_script("return _G.IG") answer1 = browser.find_element_by_id("rqAnswerOption0") answer1Title = answer1.get_attribute('data-option') answer1Code = getAnswerCode(answerEncodeKey, answer1Title) answer2 = browser.find_element_by_id("rqAnswerOption1") answer2Title = answer2.get_attribute('data-option') answer2Code = getAnswerCode(answerEncodeKey, answer2Title) correctAnswerCode = browser.execute_script( "return _w.rewardsQuizRenderInfo.correctAnswer") if (answer1Code == correctAnswerCode): answer1.click() time.sleep(8) elif (answer2Code == correctAnswerCode): answer2.click() time.sleep(8) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2)
def inputPasswordAndCapture(driver: WebDriver, password): captcha_element = ElementUtils.findElement(driver, By.ID, "login_checkcode") login_btn = ElementUtils.findElement(driver, By.ID, "login_submit_btn") inputPasswordElement = driver.find_element_by_id("password") if len(inputPasswordElement.get_attribute('value').strip()) == 0: inputPasswordElement.send_keys(password) time.sleep(0.2) captcha_element = ElementUtils.findElement(driver, By.ID, "login_checkcode") if captcha_element is not None: driver.execute_script("arguments[0].focus();", captcha_element) time.sleep(5) if len(captcha_element.get_attribute('value').strip()) == 4: # 加载loading id="spin" class="spinner" login_btn.click() time.sleep(2) login_btn = ElementUtils.findElement(driver, By.ID, "login_submit_btn") return login_btn
def completeDailySetThisOrThat(browser: WebDriver, cardNumber: int): time.sleep(2) browser.find_element_by_xpath( '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' + str(cardNumber) + ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a' ).click() time.sleep(1) browser.switch_to.window(window_name=browser.window_handles[1]) time.sleep(8) browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click() waitUntilVisible(browser, By.XPATH, '//*[@id="currentQuestionContainer"]/div/div[1]', 10) time.sleep(3) for question in range(10): answerEncodeKey = browser.execute_script("return _G.IG") answer1 = browser.find_element_by_id("rqAnswerOption0") answer1Title = answer1.get_attribute('data-option') answer1Code = browser.execute_script( "var IG = \"" + answerEncodeKey + "\"; function getAnswerCode(n){for (var r, t = 0, i = 0; i < n.length; i++) t += n.charCodeAt(i); return r = parseInt(IG.substr(IG.length - 2), 16), t += r, t.toString();} return getAnswerCode(\"" + answer1Title + "\");") answer2 = browser.find_element_by_id("rqAnswerOption1") answer2Title = answer2.get_attribute('data-option') answer2Code = browser.execute_script( "var IG = \"" + answerEncodeKey + "\"; function getAnswerCode(n){for (var r, t = 0, i = 0; i < n.length; i++) t += n.charCodeAt(i); return r = parseInt(IG.substr(IG.length - 2), 16), t += r, t.toString();} return getAnswerCode(\"" + answer2Title + "\");") correctAnswerCode = browser.execute_script( "return _w.rewardsQuizRenderInfo.correctAnswer") if (answer1Code == correctAnswerCode): answer1.click() time.sleep(8) elif (answer2Code == correctAnswerCode): answer2.click() time.sleep(8) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2)
def login(browser: WebDriver, email: str, pwd: str, isMobile: bool = False): # Access to bing.com browser.get('https://login.live.com/') # Wait complete loading waitUntilVisible(browser, By.ID, 'loginHeader', 10) # Enter email print('[LOGIN]', 'Writing email...') browser.find_element_by_name("loginfmt").send_keys(email) # Click next browser.find_element_by_id('idSIButton9').click() # Wait 2 seconds time.sleep(2) # Wait complete loading waitUntilVisible(browser, By.ID, 'loginHeader', 10) # Enter password #browser.find_element_by_id("i0118").send_keys(pwd) browser.execute_script("document.getElementById('i0118').value = '" + pwd + "';") print('[LOGIN]', 'Writing password...') # Click next browser.find_element_by_id('idSIButton9').click() # Wait 5 seconds time.sleep(5) # Click Security Check print('[LOGIN]', 'Passing security checks...') try: browser.find_element_by_id('iLandingViewAction').click() except (NoSuchElementException, ElementNotInteractableException) as e: pass # Wait complete loading try: waitUntilVisible(browser, By.ID, 'KmsiCheckboxField', 10) except (TimeoutException) as e: pass # Click next try: browser.find_element_by_id('idSIButton9').click() # Wait 5 seconds time.sleep(5) except (NoSuchElementException, ElementNotInteractableException) as e: pass print('[LOGIN]', 'Logged-in !') # Check Login print('[LOGIN]', 'Ensuring login on Bing...') checkBingLogin(browser, isMobile)
def user_agent_to_session(self, driver: WebDriver = None, session: Session = None) -> None: """把driver的user-agent复制到session""" driver = driver or self.driver session = session or self.session selenium_user_agent = driver.execute_script( "return navigator.userAgent;") session.headers.update({"User-Agent": selenium_user_agent})
def download_file(browser: WebDriver, doc_id: str): try: browser.execute_script("_Layout_DownloadAuthority('" + doc_id + "', 'P001', 'P001', 0)") time.sleep(0.5) ok = False while True: input = browser.find_element_by_css_selector( 'input[name="ValidateCode"]') submit = browser.find_elements_by_css_selector( '.ui-dialog-buttonset button')[0] time.sleep(0.3) with open(captcha_temp, 'wb') as file: file.write( browser.find_element_by_css_selector( 'img[alt="驗證碼圖片"]').screenshot_as_png) img = Image.open(captcha_temp) img = convert_img(img, 192) img.save(captcha2_temp) text = pytesseract.image_to_string(img, lang='eng') text = str(text).replace(" ", "").replace("\n", "").replace("\f", "") print('##' + text + '##') if text == '' or text is None: refresh = browser.find_element_by_xpath( '/html/body/div[10]/div[2]/p/a').click() time.sleep(0.1) continue input.send_keys(text) time.sleep(0.1) submit.click() time.sleep(0.1) try: browser.switch_to.alert.accept() continue except NoAlertPresentException: pass except BaseException: pass print(str(os.listdir(download_temp_path))) while len(os.listdir(download_temp_path)) == 0: time.sleep(1) time.sleep(1) return download_temp_path + os.path.sep + os.listdir(download_temp_path)[0]
def _scroll_up_like_human(driver: WebDriver, step=50, wait=0.03, verbose=False): pos = driver.execute_script('return window.pageYOffset;') if verbose: print("pos0: ", pos) prev_yoffset = -1 while True: driver.execute_script(f"window.scrollTo(0,{pos})") pos -= step * random.lognormvariate(0, 0.1) _human_wait(wait) yoffset = driver.execute_script('return window.pageYOffset;') if yoffset == prev_yoffset or should_stop(): break prev_yoffset = yoffset
def _scroll_to_y(driver: WebDriver, target_y: int, step=70, verbose=False): # print( f'target_t = {target_y}') while True: cur_y = driver.execute_script('return window.pageYOffset') if verbose: print(f'scroll_to_y: cur_y: {cur_y}') if abs(target_y - cur_y) < 50: driver.execute_script(f"window.scrollTo(0, {target_y})") break else: direction = +1.0 if (target_y - cur_y) >= 0 else -1.0 next_y = int(cur_y + direction * step * random.lognormvariate(0, 0.2)) # print(cur_y, next_y) driver.execute_script(f"window.scrollTo(0, {next_y})") if should_stop(): break _human_wait(0.05)
def completeMorePromotionThisOrThat(browser: WebDriver, cardNumber: int): browser.find_element_by_xpath( '//*[@id="more-activities"]/div/mee-card[' + str(cardNumber) + ']/div/card-content/mee-rewards-more-activities-card-item/div/div[3]/a' ).click() time.sleep(1) browser.switch_to.window(window_name=browser.window_handles[1]) time.sleep(8) if not waitUntilQuizLoads(browser): resetTabs(browser) return browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click() waitUntilVisible(browser, By.XPATH, '//*[@id="currentQuestionContainer"]/div/div[1]', 10) time.sleep(3) for question in range(10): answerEncodeKey = browser.execute_script("return _G.IG") answer1 = browser.find_element_by_id("rqAnswerOption0") answer1Title = answer1.get_attribute('data-option') answer1Code = getAnswerCode(answerEncodeKey, answer1Title) answer2 = browser.find_element_by_id("rqAnswerOption1") answer2Title = answer2.get_attribute('data-option') answer2Code = getAnswerCode(answerEncodeKey, answer2Title) correctAnswerCode = browser.execute_script( "return _w.rewardsQuizRenderInfo.correctAnswer") if (answer1Code == correctAnswerCode): answer1.click() time.sleep(8) elif (answer2Code == correctAnswerCode): answer2.click() time.sleep(8) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name=browser.window_handles[0]) time.sleep(2)
async def _get_request_metadata_from_web_driver( cls, driver: WebDriver) -> Tuple[int, int, int, int]: response: Tuple[int, str, str, str] = driver.execute_script( comment_metadata_javascript) request_id: int = response[0] type_id: int = int(response[1]) item_id: int = int(response[2]) max_comments: int = int( non_number_replacement_regex.sub(repl='', string=response[3]) or '0') return request_id, type_id, item_id, max_comments
def do(self, task_driver: WebDriver, task_url: str, timeout: int): task_driver.get(url=task_url) if check_task.check_wrap(task_driver=task_driver): return None bar = tqdm( desc=TASK_IDE[TASK_ID[1]], total=timeout, leave=False, ncols=BAR_LENGTH ) for i in range(1, timeout+1): bar.update(1) mix: float = get_random.get_random_float( a=0, b=10 ) js: str = PAGE_ROLL_JS.format(i*10+mix) task_driver.execute_script(script=js) time.sleep(1) bar.close() self.__success = True
def search_webku(driver: WebDriver): print("开始加载网页..") try: driver.set_page_load_timeout(PAGE_LOAD_TIMEOUT) driver.set_script_timeout(PAGE_LOAD_TIMEOUT) driver.get(WENKU_URL) driver.set_page_load_timeout(NORMAL_FIND_ELEMENT_TIMEOUT) driver.set_script_timeout(NORMAL_FIND_ELEMENT_TIMEOUT) except: # 不要停止加载网页, 因为子page的内容需要懒加载 # driver.execute_script("window.stop()") pass # 点击继续阅读加载所有子页面 print("页面加载成功,尝试寻找[继续阅读]按钮") find_more_butto_and_click(driver) text = "" page_div_list: List[WebElement] = driver.find_elements_by_class_name( "reader-page") print(f"共找到{len(page_div_list)}页, 开始循环解析..") for index in range(0, len(page_div_list)): page_div = page_div_list[index] # 滚动网页到指定page driver.execute_script("arguments[0].scrollIntoView();", page_div) # 懒加载page中的内容 wait_until_found_p_element(driver) # 获取当前page的内容 text += get_page_text(page_div) print(f"第{index + 1}页数据解析成功") # 消除多余的换行符 text = remove_needless_newline(text) print("所有页面解析完成, 准备写入文件") print(f"\n---------start--------\n{text}\n---------end--------") with open(WENKU_FILE, "w") as wf: wf.write(text) print(f"所有页面数据已写入 [{WENKU_FILE}] 中")
def completePunchCard(browser: WebDriver, url: str, childPromotions: dict): browser.get(url) for child in childPromotions: if child['complete'] == False: if child['promotionType'] == "urlreward": browser.execute_script("document.getElementsByClassName('offer-cta')[0].click()") time.sleep(1) browser.switch_to.window(window_name = browser.window_handles[1]) time.sleep(random.randint(13, 17)) browser.close() time.sleep(2) browser.switch_to.window(window_name = browser.window_handles[0]) time.sleep(2) if child['promotionType'] == "quiz": browser.execute_script("document.getElementsByClassName('offer-cta')[0].click()") time.sleep(1) browser.switch_to.window(window_name = browser.window_handles[1]) time.sleep(8) counter = str(browser.find_element_by_xpath('//*[@id="QuestionPane0"]/div[2]').get_attribute('innerHTML'))[:-1][1:] numberOfQuestions = max([int(s) for s in counter.split() if s.isdigit()]) for question in range(numberOfQuestions): browser.execute_script('document.evaluate("//*[@id=\'QuestionPane' + str(question) + '\']/div[1]/div[2]/a[' + str(random.randint(1, 3)) + ']/div", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click()') time.sleep(5) browser.find_element_by_xpath('//*[@id="AnswerPane' + str(question) + '"]/div[1]/div[2]/div[4]/a/div/span/input').click() time.sleep(3) time.sleep(5) browser.close() time.sleep(2) browser.switch_to.window(window_name = browser.window_handles[0]) time.sleep(2)
def next_search_page(self, driver: WebDriver) -> int: c = self.config try: driver.implicitly_wait(3) next_button = driver.find_element_by_css_selector(c["extras"]["next_page_btn"]) if next_button.is_enabled(): print("Next page") # next_button.click() script = f"document.querySelector('{c['extras']['next_page_btn']}').click()" driver.execute_script(script) self.wait.until(ec.presence_of_element_located((By.CLASS_NAME, c["extras"]["search_page_data"])), 'Items not found in this page') return self.NEXT_PAGE_EXISTS else: return self.NEXT_PAGE_DEAD except TimeoutException as err: print(err) if self.args["debug"]: print("*******************************************\n") print("Last Four Exceptions") import traceback traceback.print_exc() print("\n*******************************************") return self.NEXT_PAGE_DEAD except NoSuchElementException: if self.args["debug"]: print("*******************************************\n") print("Last Four Exceptions") import traceback traceback.print_exc() print("\n*******************************************") return self.NEXT_PAGE_DEAD
def parse_page(driver: WebDriver) -> lxml.html.HtmlElement: """Parse the given web page into an `lxml` HTML element. Note: Only the body of the web page is returned (after cleaning). Parameters ---------- driver : WebDriver The Selenium driver containing the web page to parse Returns ------- lxml.html.HtmlElement An `lxml` HTML element containing the body of the web page """ # Scroll to the bottom of the page driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Parse the page's body using lxml and creates a correct html document raw_html = lxml.html.document_fromstring(driver.page_source) # Clean the html cleaner = lxml.html.clean.Cleaner( page_structure=False, frames=False, forms=False, annoying_tags=False, safe_attrs=lxml.html.defs.safe_attrs | {"aria-label"}, remove_unknown_tags=False, ) cleaner(raw_html) # Obtain the body body_html = raw_html.find("body") # Scroll back to the top of the page driver.execute_script("window.scrollTo(0, 0);") return body_html