Пример #1
0
    def testEle(self):
        global text_list, urls
        for url in urls:
            my_selenium = Selenium(url)
            driver = my_selenium.driver
            driver.switch_to.default_content()

            while len(text_list) <= 100:
                # 下拉滚动条,使浏览器加载出动态加载的内容,可能像这样要拉很多次,中间要适当的延时(跟网速也有关系)。
                driver.execute_script("window.scrollBy(0,40000)")
                time.sleep(1)

                elements = my_selenium.wait_until_exists(
                    '//div[@class="comment-list"]//p[@class="text"]')
                for e in elements:
                    try:
                        text = e.text
                        if text not in text_list:
                            print(text)
                            handle_text(text, 'BilibiliComments.txt')
                            text_list.append(text)
                        else:
                            pass
                    except Exception as e:
                        print(e)

                elements = my_selenium.wait_until_exists(
                    '//a[@class="next"]')  # and @href="javascript:;"]')
                driver.execute_script("arguments[0].click();", elements[0])

            text_list = []
            driver.close()
Пример #2
0
    def testEle(self):
        my_selenium = Selenium("https://h5.qzone.qq.com/mqzone/index")
        driver = my_selenium.driver

        # 账号输入框输入已知qq账号
        driver.find_element_by_id("u").send_keys(self.user)
        # 密码框输入已知密码
        driver.find_element_by_id("p").send_keys(self.pw)
        # 自动点击登陆按钮
        driver.find_element_by_id("go").click()

        # 如果登录比较频繁或者服务器繁忙的时候,一次模拟点击可能失败,所以想到可以尝试多次,
        # 但是像QQ空间这种比较知名的社区在多次登录后都会出现验证码,验证码自动处理又是一个
        # 大问题,本例不赘述。本例采用手动确认的方式。即如果观察到自动登陆失败,手动登录后
        # 再执行下列操作。
        r = ''
        while r != 'y':
            print("Login seccessful?[y]")
            r = input()

        # 让webdriver操纵当前页
        driver.switch_to.default_content()
        while True:
            # 下拉滚动条,使浏览器加载出动态加载的内容,可能像这样要拉很多次,中间要适当的延时(跟网速也有关系)。

            elements = []
            elements += list(
                driver.find_elements_by_xpath('//*[@class="reply-txt"]'))
            elements += list(
                driver.find_elements_by_xpath('//*[@class="comment-text"]'))

            for e in elements:
                text = e.text
                if text not in text_list:
                    print(text)
                    text_list.append(e.text)
                    handle_text(text, 'QQZoneComments.txt')
                else:
                    pass

            element = my_selenium.wait_until_exists(
                '//*[@class="btn js_morebtn"]')
            driver.execute_script("arguments[0].click();", element)

            my_selenium.wait_until_exists('//*[@class="comment-text"]')
            time.sleep(1)
Пример #3
0
    def testEle(self):
        global urls, text_list
        for url in urls:
            my_selenium = Selenium(url)
            driver = my_selenium.driver

            # 让webdriver操纵当前页
            driver.switch_to.default_content()
            # 下拉滚动条,使浏览器加载出动态加载的内容,可能像这样要拉很多次,中间要适当的延时(跟网速也有关系)。
            driver.execute_script("window.scrollBy(0,40000)")
            time.sleep(1)

            elements = []
            elements += list(
                my_selenium.wait_until_exists(
                    '//div[contains(@id, "log")]/h3[@class="title"]/a'))

            for e in elements:
                try:
                    url = e.get_attribute("href")
                    my_selenium2 = Selenium(url)
                    driver2 = my_selenium2.driver

                    elements2 = list(
                        my_selenium2.wait_until_exists(
                            '//div[@class="cont"]/p'))
                    for e2 in elements2:
                        try:
                            text = e2.text
                            if text not in text_list:
                                print(text)
                                text_list.append(e.text)
                                handle_text(text, 'DuanZi.txt')
                            else:
                                pass
                        except Exception as e:
                            print(e)
                except Exception as e:
                    print(e)
                driver2.close()

            driver.close()
Пример #4
0
    predicted = model.predict(features)[0]
    predicted = np.array(predicted)
    probab = predicted.max()
    predition = sentiment_tag[predicted.argmax()]
    return predition, probab


loadModel()

from auto_everything.web import Selenium
while 1:
    url = input("请输入你想要分析的网站url: ")
    if url.strip('\n ') == "":
        continue

    my_selenium = Selenium(url)
    driver = my_selenium.driver

    driver.execute_script("window.scrollBy(0,40000)")
    time.sleep(1)
    elements = my_selenium.wait_until_exists('//p')

    for e in elements:
        try:
            text = e.text.strip('\n ')
            if text != "":
                r = predictResult(text)
                print(text, '\n', r, '\n' * 2)
        except Exception as e:
            pass
Пример #5
0
from auto_everything.web import Selenium
from time import sleep

from auto_everything.terminal import Terminal
t = Terminal()

t.kill("chrome")

my_selenium = Selenium("https://www.google.com",
                       headless=False,
                       user_data_dir="/home/yingshaoxo/.config/google-chrome/")
d = my_selenium.driver

# get input box
xpath = '/html/body/div[1]/div[3]/form/div[2]/div[1]/div[1]/div/div[2]/input'
elements = my_selenium.wait_until_exists(xpath)

# text inputing
elements[0].send_keys('\b' * 20, "yingshaoxo")

# click search button
elements = my_selenium.wait_until_exists('//input[@value="Google Search"]')
if len(elements):
    elements[0].click(
    )  # d.execute_script("arguments[0].click();", elements[0])

# exit
sleep(30)
d.quit()
Пример #6
0
——————————————

"""
        f.write(text)


'''
urls = """
https://www.bilibili.com/video/av1415480
https://www.bilibili.com/video/av25255526
"""
urls = [url.strip(' \n') for url in urls.split('\n') if url.strip(' \n') != ""]
'''

top_url = "https://www.bilibili.com/ranking/all/155/0/3"
my_selenium = Selenium(top_url)
driver = my_selenium.driver

elements = my_selenium.wait_until_exists(
    '//a[@class="title" and @target="_blank"]')
urls = [e.get_attribute("href") for e in elements]
driver.close()

text_list = []


class seleniumTest(unittest.TestCase):
    def setUp(self):
        pass

    def testEle(self):
Пример #7
0
    if model is None:
        print("Please run \"loadModel\" first.")
        return None
    features = findFeatures(text)
    predicted = model.predict(features)[0] # we have only one sentence to predict, so take index 0
    predicted = np.array(predicted)
    probab = predicted.max()
    predition = sentiment_tag[predicted.argmax()]
    return predition, probab


model = loadModel()

import time
from auto_everything.web import Selenium
my_selenium = Selenium("https://baidu.com")
driver = my_selenium.driver

print('\n'*12)
while True:
    url = input("请输入你想要分析的网站url: ")
    if url.strip('\n ') == "":
        continue

    driver.get(url)
    driver.execute_script("window.scrollBy(0,40000)")
    time.sleep(1)
    elements = my_selenium.wait_until_exists('//div') # //p

    for e in elements:
        try: