def testEle(self): global text_list, urls for url in urls: my_selenium = Selenium(url) driver = my_selenium.driver driver.switch_to.default_content() while len(text_list) <= 100: # 下拉滚动条,使浏览器加载出动态加载的内容,可能像这样要拉很多次,中间要适当的延时(跟网速也有关系)。 driver.execute_script("window.scrollBy(0,40000)") time.sleep(1) elements = my_selenium.wait_until_exists( '//div[@class="comment-list"]//p[@class="text"]') for e in elements: try: text = e.text if text not in text_list: print(text) handle_text(text, 'BilibiliComments.txt') text_list.append(text) else: pass except Exception as e: print(e) elements = my_selenium.wait_until_exists( '//a[@class="next"]') # and @href="javascript:;"]') driver.execute_script("arguments[0].click();", elements[0]) text_list = [] driver.close()
def testEle(self): my_selenium = Selenium("https://h5.qzone.qq.com/mqzone/index") driver = my_selenium.driver # 账号输入框输入已知qq账号 driver.find_element_by_id("u").send_keys(self.user) # 密码框输入已知密码 driver.find_element_by_id("p").send_keys(self.pw) # 自动点击登陆按钮 driver.find_element_by_id("go").click() # 如果登录比较频繁或者服务器繁忙的时候,一次模拟点击可能失败,所以想到可以尝试多次, # 但是像QQ空间这种比较知名的社区在多次登录后都会出现验证码,验证码自动处理又是一个 # 大问题,本例不赘述。本例采用手动确认的方式。即如果观察到自动登陆失败,手动登录后 # 再执行下列操作。 r = '' while r != 'y': print("Login seccessful?[y]") r = input() # 让webdriver操纵当前页 driver.switch_to.default_content() while True: # 下拉滚动条,使浏览器加载出动态加载的内容,可能像这样要拉很多次,中间要适当的延时(跟网速也有关系)。 elements = [] elements += list( driver.find_elements_by_xpath('//*[@class="reply-txt"]')) elements += list( driver.find_elements_by_xpath('//*[@class="comment-text"]')) for e in elements: text = e.text if text not in text_list: print(text) text_list.append(e.text) handle_text(text, 'QQZoneComments.txt') else: pass element = my_selenium.wait_until_exists( '//*[@class="btn js_morebtn"]') driver.execute_script("arguments[0].click();", element) my_selenium.wait_until_exists('//*[@class="comment-text"]') time.sleep(1)
def testEle(self): global urls, text_list for url in urls: my_selenium = Selenium(url) driver = my_selenium.driver # 让webdriver操纵当前页 driver.switch_to.default_content() # 下拉滚动条,使浏览器加载出动态加载的内容,可能像这样要拉很多次,中间要适当的延时(跟网速也有关系)。 driver.execute_script("window.scrollBy(0,40000)") time.sleep(1) elements = [] elements += list( my_selenium.wait_until_exists( '//div[contains(@id, "log")]/h3[@class="title"]/a')) for e in elements: try: url = e.get_attribute("href") my_selenium2 = Selenium(url) driver2 = my_selenium2.driver elements2 = list( my_selenium2.wait_until_exists( '//div[@class="cont"]/p')) for e2 in elements2: try: text = e2.text if text not in text_list: print(text) text_list.append(e.text) handle_text(text, 'DuanZi.txt') else: pass except Exception as e: print(e) except Exception as e: print(e) driver2.close() driver.close()
predicted = model.predict(features)[0] predicted = np.array(predicted) probab = predicted.max() predition = sentiment_tag[predicted.argmax()] return predition, probab loadModel() from auto_everything.web import Selenium while 1: url = input("请输入你想要分析的网站url: ") if url.strip('\n ') == "": continue my_selenium = Selenium(url) driver = my_selenium.driver driver.execute_script("window.scrollBy(0,40000)") time.sleep(1) elements = my_selenium.wait_until_exists('//p') for e in elements: try: text = e.text.strip('\n ') if text != "": r = predictResult(text) print(text, '\n', r, '\n' * 2) except Exception as e: pass
from auto_everything.web import Selenium from time import sleep from auto_everything.terminal import Terminal t = Terminal() t.kill("chrome") my_selenium = Selenium("https://www.google.com", headless=False, user_data_dir="/home/yingshaoxo/.config/google-chrome/") d = my_selenium.driver # get input box xpath = '/html/body/div[1]/div[3]/form/div[2]/div[1]/div[1]/div/div[2]/input' elements = my_selenium.wait_until_exists(xpath) # text inputing elements[0].send_keys('\b' * 20, "yingshaoxo") # click search button elements = my_selenium.wait_until_exists('//input[@value="Google Search"]') if len(elements): elements[0].click( ) # d.execute_script("arguments[0].click();", elements[0]) # exit sleep(30) d.quit()
—————————————— """ f.write(text) ''' urls = """ https://www.bilibili.com/video/av1415480 https://www.bilibili.com/video/av25255526 """ urls = [url.strip(' \n') for url in urls.split('\n') if url.strip(' \n') != ""] ''' top_url = "https://www.bilibili.com/ranking/all/155/0/3" my_selenium = Selenium(top_url) driver = my_selenium.driver elements = my_selenium.wait_until_exists( '//a[@class="title" and @target="_blank"]') urls = [e.get_attribute("href") for e in elements] driver.close() text_list = [] class seleniumTest(unittest.TestCase): def setUp(self): pass def testEle(self):
if model is None: print("Please run \"loadModel\" first.") return None features = findFeatures(text) predicted = model.predict(features)[0] # we have only one sentence to predict, so take index 0 predicted = np.array(predicted) probab = predicted.max() predition = sentiment_tag[predicted.argmax()] return predition, probab model = loadModel() import time from auto_everything.web import Selenium my_selenium = Selenium("https://baidu.com") driver = my_selenium.driver print('\n'*12) while True: url = input("请输入你想要分析的网站url: ") if url.strip('\n ') == "": continue driver.get(url) driver.execute_script("window.scrollBy(0,40000)") time.sleep(1) elements = my_selenium.wait_until_exists('//div') # //p for e in elements: try: