Пример #1
0
    def get_link(self, query, s_date, e_date):
        self.query = query
        self.s_date=s_date
        self.e_date=e_date
        options = webdriver.ChromeOptions()
        options.add_argument(
            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36")

        options.add_argument('headless')
        options.add_argument('--disable-gpu')
        options.add_argument('lang=ko_KR')
        browser = WebDriver(executable_path='D:\python_workspace\pyTextMiner\selenium_server\chromedriver.exe', options=options)
        #browser = WebDriver(executable_path='/usr/lib/chromium-browser/chromedriver', options=options)
        url = "https://m.search.naver.com/search.naver?where=m_blog&sm=mtb_opt&query=" + query + "&display=15&st=sim&nso=p%3Afrom" + s_date + "to" + e_date
        browser.get(url)
        browser.implicitly_wait(random.randrange(5,10))
        SCROLL_PAUSE_TIME = 1.5
        # Get scroll height
        last_height = browser.execute_script("return document.body.scrollHeight")
        while True:
            # Scroll down to bottom
            browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            # Wait to load page
            time.sleep(SCROLL_PAUSE_TIME)
            # Calculate new scroll height and compare with last scroll height
            new_height = browser.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                cont = browser.page_source
                soup = BeautifulSoup(cont, 'html.parser')
                for urls in soup.select(".total_dsc"):
                    if urls["href"].startswith("https://m.blog.naver.com") or 'blog.me' in urls["href"]:
                        self.ab_url.append(urls['href'])
                break
            last_height = new_height
        time.sleep(random.randrange(5,15))
Пример #2
0
def completeMorePromotionABC(browser: WebDriver, cardNumber: int):
    browser.find_element_by_xpath(
        '//*[@id="more-activities"]/div/mee-card[' + str(cardNumber) +
        ']/div/card-content/mee-rewards-more-activities-card-item/div/div[3]/a'
    ).click()
    time.sleep(1)
    browser.switch_to.window(window_name=browser.window_handles[1])
    time.sleep(8)
    counter = str(
        browser.find_element_by_xpath('//*[@id="QuestionPane0"]/div[2]').
        get_attribute('innerHTML'))[:-1][1:]
    numberOfQuestions = max([int(s) for s in counter.split() if s.isdigit()])
    for question in range(numberOfQuestions):
        browser.execute_script(
            'document.evaluate("//*[@id=\'QuestionPane' + str(question) +
            '\']/div[1]/div[2]/a[' + str(random.randint(1, 3)) +
            ']/div", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click()'
        )
        time.sleep(5)
        browser.find_element_by_xpath(
            '//*[@id="AnswerPane' + str(question) +
            '"]/div[1]/div[2]/div[4]/a/div/span/input').click()
        time.sleep(3)
    time.sleep(5)
    browser.close()
    time.sleep(2)
    browser.switch_to.window(window_name=browser.window_handles[0])
    time.sleep(2)
Пример #3
0
def completeDailySetVariableActivity(browser: WebDriver, cardNumber: int):
    time.sleep(2)
    browser.find_element_by_xpath(
        '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' +
        str(cardNumber) +
        ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a'
    ).click()
    time.sleep(1)
    browser.switch_to.window(window_name=browser.window_handles[1])
    time.sleep(8)
    try:
        browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click()
        waitUntilVisible(browser, By.XPATH,
                         '//*[@id="currentQuestionContainer"]/div/div[1]', 3)
    except (NoSuchElementException, TimeoutException):
        try:
            counter = str(
                browser.find_element_by_xpath(
                    '//*[@id="QuestionPane0"]/div[2]').get_attribute(
                        'innerHTML'))[:-1][1:]
            numberOfQuestions = max(
                [int(s) for s in counter.split() if s.isdigit()])
            for question in range(numberOfQuestions):
                browser.execute_script(
                    'document.evaluate("//*[@id=\'QuestionPane' +
                    str(question) + '\']/div[1]/div[2]/a[' +
                    str(random.randint(1, 3)) +
                    ']/div", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click()'
                )
                time.sleep(5)
                browser.find_element_by_xpath(
                    '//*[@id="AnswerPane' + str(question) +
                    '"]/div[1]/div[2]/div[4]/a/div/span/input').click()
                time.sleep(3)
            time.sleep(5)
            browser.close()
            time.sleep(2)
            browser.switch_to.window(window_name=browser.window_handles[0])
            time.sleep(2)
            return
        except NoSuchElementException:
            time.sleep(random.randint(5, 9))
            browser.close()
            time.sleep(2)
            browser.switch_to.window(window_name=browser.window_handles[0])
            time.sleep(2)
            return
    time.sleep(3)
    correctAnswer = browser.execute_script(
        "return _w.rewardsQuizRenderInfo.correctAnswer")
    if browser.find_element_by_id("rqAnswerOption0").get_attribute(
            "data-option") == correctAnswer:
        browser.find_element_by_id("rqAnswerOption0").click()
    else:
        browser.find_element_by_id("rqAnswerOption1").click()
    time.sleep(10)
    browser.close()
    time.sleep(2)
    browser.switch_to.window(window_name=browser.window_handles[0])
    time.sleep(2)
def login(driver: WebDriver):
    imgFilePath = veryeast_config.SCREEN_IMG_DIR + "/img.png"

    # 打开后台网址
    driver.get(veryeast_config.BG_SYSTEM_URL)
    # 获取屏幕缩放因子
    devicePixelRatio = driver.execute_script("return window.devicePixelRatio")
    print("devicePixelRatio=%s" % devicePixelRatio)

    width = driver.execute_script(
        "return document.documentElement.scrollWidth")
    height = driver.execute_script(
        "return document.documentElement.scrollHeight")
    print("width=%s, height=%s" % (width, height))
    driver.set_window_size(width, height)
    # 输入用户名
    inputUserNameElement = driver.find_element_by_id("username")
    inputUserNameElement.send_keys(base.config.account.veryeast_username)

    loginElement = driver.find_element_by_id("butn")
    while loginElement is not None:
        loginElement = inputPwdCaptchaAndLogin(driver, imgFilePath,
                                               devicePixelRatio)

    # 进入主页面后
    WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.CLASS_NAME, "sider___g53Yu")))
Пример #5
0
def _scroll_to_elem(driver: WebDriver,
                    elem: WebElement,
                    y_delta=-70,
                    step=70,
                    verbose=False,
                    stop_if_visible=True):
    prev_y = -1
    while True:
        elem_y = elem.location['y']
        target_y = elem.location['y'] + y_delta

        cur_y = driver.execute_script('return window.pageYOffset')
        if verbose:
            print(
                f'scroll_to_elem: target_y: {target_y} elem.displayed: {elem.is_displayed()} '
                f'cur_y: {cur_y}')

        if abs(target_y - cur_y) < 50:
            driver.execute_script(f"window.scrollTo(0, {target_y})")
            prev_y = cur_y
            break
        elif (cur_y == prev_y) and elem.is_displayed() and elem.is_enabled():
            break
        else:
            direction = +1.0 if (target_y - cur_y) >= 0 else -1.0
            next_y = int(cur_y +
                         direction * step * random.lognormvariate(0, 0.2))
            driver.execute_script(f"window.scrollTo(0, {next_y})")
            prev_y = cur_y
        if should_stop():
            break
        _human_wait(0.05)
Пример #6
0
def assertLogin(d: WebDriver):
    d.get("https://myaccount.google.com/")
    sleep(0.4)
    if match(r"^(http|https):\/\/(myaccount\.google\.com).*$", d.current_url):
        d.find_element_by_xpath(
            "/html/body/div[2]/header/div[2]/div[3]/div[1]/div/div/a").click()
        sleep(0.1)
        if d.execute_script(
                "return (document.querySelector('.gb_sb').innerText == \"%s\" ? true : false)"
                % getenv('GEMAIL')):
            return
        else:
            d.get("https://accounts.google.com/Logout")
            sleep(1)
            pass
    else:
        pass
    d.get(
        'https://accounts.google.com/o/oauth2/v2/auth/oauthchooseaccount?redirect_uri=https%3A%2F%2Fdevelopers.google.com%2Foauthplayground&prompt=consent&response_type=code&client_id=407408718192.apps.googleusercontent.com&scope=email&access_type=offline&flowName=GeneralOAuthFlow'
    )
    sleep(1)
    d.execute_script("""
if (document.querySelector('.OVnw0d') != null) {
    for (li of document.querySelector('.OVnw0d').children) {
        if (li.innerText == "Use another account") {
            li.children[0].click();
        }
    }
    return true;
} else {
    return false;
}
""")
    sleep(1)
    try:
        d.find_element_by_xpath(
            "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div/div[1]/div/div[1]/input"
        ).send_keys(getenv('GEMAIL'))
        d.find_element_by_xpath(
            "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div/div[1]/div/div[1]/input"
        ).send_keys(Keys.RETURN)
        sleep(2)
        d.find_element_by_xpath(
            "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div[1]/div/div/div/div/div[1]/div/div[1]/input"
        ).send_keys(getenv('GPASS'))
        d.find_element_by_xpath(
            "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div[1]/div/div/div/div/div[1]/div/div[1]/input"
        ).send_keys(Keys.RETURN)
    except NoSuchElementException:
        d.find_element_by_id("Email").send_keys(getenv('GEMAIL'))
        d.find_element_by_id("Email").send_keys(Keys.RETURN)
        sleep(2)
        d.find_element_by_id("password").send_keys(getenv('GPASS'))
        d.find_element_by_id("password").send_keys(Keys.RETURN)
    sleep(2)
    assert "developers.google.com/oauthplayground" in d.current_url
Пример #7
0
 def __play(self, task_driver: WebDriver):
     js = '''
     var d = document.getElementsByTagName("div");
         for (var i=0;i<d.length;i++){
             if(d[i].className == \'''' + VIDEO_PLAY_CLASS_NAME2 + '''\'){
                 d[i].click();
                 break;
             }
         }'''
     task_driver.execute_script(script=js)
Пример #8
0
def setup_proxy(driver: WebDriver, ip, port):
    driver.get("about:config")
    proxy_type = 5 if port == 0 else 1
    setupScript = f"""var
                    prefs = Components.classes["@mozilla.org/preferences-service;1"]
                    .getService(Components.interfaces.nsIPrefBranch);
                    prefs.setIntPref("network.proxy.type", {proxy_type});
                    prefs.setCharPref("network.proxy.socks", "{ip}");
                    prefs.setIntPref("network.proxy.socks_port", {port});
                    prefs.setBoolPref("network.proxy.socks_remote_dns",false);"""
    driver.execute_script(setupScript)
Пример #9
0
def collect_moments(driver: WebDriver):
    driver.get(moments_url)
    while True:
        js = 'window.scrollBy(0,10000)'
        driver.execute_script(js)
        videos = driver.find_elements_by_class_name("card")
        if len(videos) >= max_video:
            break
        time.sleep(1)

    return videos
def completeDailySetQuiz(browser: WebDriver, cardNumber: int):
    time.sleep(2)
    browser.find_element_by_xpath(
        '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' +
        str(cardNumber) +
        ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a'
    ).click()
    time.sleep(1)
    browser.switch_to.window(window_name=browser.window_handles[1])
    time.sleep(8)
    if not waitUntilQuizLoads(browser):
        resetTabs(browser)
        return
    browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click()
    waitUntilVisible(browser, By.XPATH,
                     '//*[@id="currentQuestionContainer"]/div/div[1]', 10)
    time.sleep(3)
    numberOfQuestions = browser.execute_script(
        "return _w.rewardsQuizRenderInfo.maxQuestions")
    numberOfOptions = browser.execute_script(
        "return _w.rewardsQuizRenderInfo.numberOfOptions")
    for question in range(numberOfQuestions):
        if numberOfOptions == 8:
            answers = []
            for i in range(8):
                if browser.find_element_by_id("rqAnswerOption" + str(
                        i)).get_attribute("iscorrectoption").lower() == "true":
                    answers.append("rqAnswerOption" + str(i))
            for answer in answers:
                browser.find_element_by_id(answer).click()
                time.sleep(5)
                if not waitUntilQuestionRefresh(browser):
                    return
            time.sleep(5)
        elif numberOfOptions == 4:
            correctOption = browser.execute_script(
                "return _w.rewardsQuizRenderInfo.correctAnswer")
            for i in range(4):
                if browser.find_element_by_id(
                        "rqAnswerOption" +
                        str(i)).get_attribute("data-option") == correctOption:
                    browser.find_element_by_id("rqAnswerOption" +
                                               str(i)).click()
                    time.sleep(5)
                    if not waitUntilQuestionRefresh(browser):
                        return
                    break
            time.sleep(5)
    time.sleep(5)
    browser.close()
    time.sleep(2)
    browser.switch_to.window(window_name=browser.window_handles[0])
    time.sleep(2)
Пример #11
0
def email_login(driver: WebDriver, user_account, user_password):
    """
    126邮箱登录验证
    :param driver:浏览器驱动
    :return:
    """

    url = "https://passport.126.com/ydzj/maildl?product=mail126&pdconf=yddl_mail126_conf&mc=146E1F&curl=https%3A%2F%2Fmail.126.com%2Fentry%2Fcgi%2Fntesdoor%3Ffrom%3Dsmart%26language%3D0%26style%3D11%26destip%3D192.168.202.48%26df%3Dsmart_ios"
    js = 'window.open("{}");'.format(url)
    driver.execute_script(js)
    handles = driver.window_handles
    print(handles)
    driver.switch_to_window(handles[1])  # 切换回原来页面
    time.sleep(10)
    emial_account = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, 'input[name="account"]')))

    super_sendkeys(emial_account, user_account.split("@")[0])
    time.sleep(5)
    email_password = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, 'input[type="password"]')))

    super_sendkeys(email_password, user_password)
    time.sleep(5)

    login_email = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, 'input[type="password"]')))
    login_email.send_keys(Keys.ENTER)
    time.sleep(5)

    # 处理弹框
    time.sleep(30)
    alter_info = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.CSS_SELECTOR, 'div[class="msgbox-simpleText "]')))
    if alter_info:
        alter_button = driver.find_elements_by_css_selector(
            'span[class="btn-inner"]')
        alter_button[2].click()
    handles = driver.window_handles
    print(handles)
    driver.switch_to_window(handles[1])  # 切换回原来页面

    search_button = driver.find_element_by_css_selector(
        'div[class="toolbar-optItem "]')
    search_button.click()

    send_info = driver.find_element_by_css_selector('input[class="ipt-input"]')
    send_info.send_keys("facebook")
    send_info.send_keys(Keys.ENTER)
Пример #12
0
def find_more_butto_and_click(driver: WebDriver):
    more_button = None
    for more_btn_class in MORE_BUTTON_CLASS:
        try:
            more_button = driver.find_element_by_class_name(more_btn_class)
        except:
            pass
        if more_button is not None:
            print("点击[继续阅读]按钮,加载所有子页面")
            driver.execute_script("arguments[0].click();", more_button)
            time.sleep(2)
            return
    print("没有找到[继续阅读]按钮")
Пример #13
0
def _scroll_down_like_human(driver: WebDriver, step=70, wait=0.03):
    pos = 100
    prev_yoffset = 0
    while True:
        driver.execute_script(f"window.scrollTo(0, {pos})")
        pos += step * random.lognormvariate(0, 0.1)
        _human_wait(wait)

        yoffset = driver.execute_script('return window.pageYOffset;')
        if yoffset == prev_yoffset or should_stop():
            break

        prev_yoffset = yoffset
Пример #14
0
def get_job_items_per_tab(driver: WebDriver, main_container: dict):
    try:
        list_containers = driver.find_elements(By.CLASS_NAME,
                                               'list-container')[:-1]
        keys = list(main_container.keys())
        print("Fetching data...")
        for idx in range(len(list_containers)):
            job_list = []
            job_container = list_containers[idx].find_elements(
                By.TAG_NAME, 'div')[3]
            job_items = job_container.find_elements(By.TAG_NAME, 'a')

            for job_item in tqdm(job_items):
                driver.execute_script(
                    f"window.open('{job_item.get_attribute('href')}', '_blank');"
                )
                windows = driver.window_handles
                sleep(3)
                driver.switch_to.window(windows[1])
                driver.implicitly_wait(5)

                # create a Job object from form input fields
                company = driver.find_element(
                    By.XPATH,
                    "//input[@placeholder='Company']").get_attribute('value')
                job_title = driver.find_element(
                    By.XPATH,
                    "//input[@placeholder='+ add title']").get_attribute(
                        'value')
                location = driver.find_element(
                    By.XPATH,
                    "//input[@placeholder='+ add location']").get_attribute(
                        'value')
                description = driver.find_element(By.CLASS_NAME,
                                                  'ql-editor').text
                post_url = driver.find_element(
                    By.XPATH, "//p[@title='Post URL']/following-sibling::div"
                ).find_element(By.TAG_NAME, 'a').get_attribute('href')

                a_job = Job(company, job_title, post_url, location,
                            description)
                job_list.append(a_job.as_dict())
                driver.close()
                driver.switch_to.window(windows[0])

            main_container[keys[idx]] = job_list

    except Exception as err:
        print(f"Error getting job_container:" + str(err))
        sys.exit(2)
Пример #15
0
def GetExampleAndSchema(driver: WebDriver) -> Tuple[str, str]:
    """Extract JSON schema and examples from an endpoint page."""
    # Attempt to get the data from the bottom table.
    # This is the schema for a POST request payload for upload.
    example = driver.execute_script('return jQuery("textarea.payload_text").val();')
    schema = driver.execute_script('return jQuery("textarea.payload_text_schema").val();')
    if example is None:
        # Attempt to get the date from the table on the right side.
        # This is the schema for the GET's response.
        example = driver.execute_script('return jQuery("textarea#response_body_example").val();')
        schema = driver.execute_script('return jQuery("textarea#response_body_schema").val();')
        if example is None:
            # Give up, there's probably no table.
            example = ''
            schema = ''
    return example, schema
Пример #16
0
def completeDailySetThisOrThat(browser: WebDriver, cardNumber: int):
    time.sleep(2)
    browser.find_element_by_xpath(
        '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' +
        str(cardNumber) +
        ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a'
    ).click()
    time.sleep(1)
    browser.switch_to.window(window_name=browser.window_handles[1])
    time.sleep(8)
    loaded = False
    while (loaded == False):
        try:
            browser.find_element_by_xpath('//*[@id="rqStartQuiz"]')
            loaded = True
        except:
            time.sleep(0.5)
    browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click()
    waitUntilVisible(browser, By.XPATH,
                     '//*[@id="currentQuestionContainer"]/div/div[1]', 10)
    time.sleep(3)
    for question in range(10):
        answerEncodeKey = browser.execute_script("return _G.IG")

        answer1 = browser.find_element_by_id("rqAnswerOption0")
        answer1Title = answer1.get_attribute('data-option')
        answer1Code = getAnswerCode(answerEncodeKey, answer1Title)

        answer2 = browser.find_element_by_id("rqAnswerOption1")
        answer2Title = answer2.get_attribute('data-option')
        answer2Code = getAnswerCode(answerEncodeKey, answer2Title)

        correctAnswerCode = browser.execute_script(
            "return _w.rewardsQuizRenderInfo.correctAnswer")

        if (answer1Code == correctAnswerCode):
            answer1.click()
            time.sleep(8)
        elif (answer2Code == correctAnswerCode):
            answer2.click()
            time.sleep(8)

    time.sleep(5)
    browser.close()
    time.sleep(2)
    browser.switch_to.window(window_name=browser.window_handles[0])
    time.sleep(2)
Пример #17
0
def inputPasswordAndCapture(driver: WebDriver, password):
    captcha_element = ElementUtils.findElement(driver, By.ID, "login_checkcode")
    login_btn = ElementUtils.findElement(driver, By.ID, "login_submit_btn")
    inputPasswordElement = driver.find_element_by_id("password")
    if len(inputPasswordElement.get_attribute('value').strip()) == 0:
        inputPasswordElement.send_keys(password)
        time.sleep(0.2)
        captcha_element = ElementUtils.findElement(driver, By.ID, "login_checkcode")
        if captcha_element is not None:
            driver.execute_script("arguments[0].focus();", captcha_element)
    time.sleep(5)
    if len(captcha_element.get_attribute('value').strip()) == 4:
        # 加载loading id="spin" class="spinner"
        login_btn.click()
        time.sleep(2)
        login_btn = ElementUtils.findElement(driver, By.ID, "login_submit_btn")
    return login_btn
Пример #18
0
def completeDailySetThisOrThat(browser: WebDriver, cardNumber: int):
    time.sleep(2)
    browser.find_element_by_xpath(
        '//*[@id="daily-sets"]/mee-card-group[1]/div/mee-card[' +
        str(cardNumber) +
        ']/div/card-content/mee-rewards-daily-set-item-content/div/div[3]/a'
    ).click()
    time.sleep(1)
    browser.switch_to.window(window_name=browser.window_handles[1])
    time.sleep(8)
    browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click()
    waitUntilVisible(browser, By.XPATH,
                     '//*[@id="currentQuestionContainer"]/div/div[1]', 10)
    time.sleep(3)
    for question in range(10):
        answerEncodeKey = browser.execute_script("return _G.IG")

        answer1 = browser.find_element_by_id("rqAnswerOption0")
        answer1Title = answer1.get_attribute('data-option')
        answer1Code = browser.execute_script(
            "var IG = \"" + answerEncodeKey +
            "\"; function getAnswerCode(n){for (var r, t = 0, i = 0; i < n.length; i++) t += n.charCodeAt(i); return r = parseInt(IG.substr(IG.length - 2), 16), t += r, t.toString();} return getAnswerCode(\""
            + answer1Title + "\");")

        answer2 = browser.find_element_by_id("rqAnswerOption1")
        answer2Title = answer2.get_attribute('data-option')
        answer2Code = browser.execute_script(
            "var IG = \"" + answerEncodeKey +
            "\"; function getAnswerCode(n){for (var r, t = 0, i = 0; i < n.length; i++) t += n.charCodeAt(i); return r = parseInt(IG.substr(IG.length - 2), 16), t += r, t.toString();} return getAnswerCode(\""
            + answer2Title + "\");")

        correctAnswerCode = browser.execute_script(
            "return _w.rewardsQuizRenderInfo.correctAnswer")

        if (answer1Code == correctAnswerCode):
            answer1.click()
            time.sleep(8)
        elif (answer2Code == correctAnswerCode):
            answer2.click()
            time.sleep(8)

    time.sleep(5)
    browser.close()
    time.sleep(2)
    browser.switch_to.window(window_name=browser.window_handles[0])
    time.sleep(2)
Пример #19
0
def login(browser: WebDriver, email: str, pwd: str, isMobile: bool = False):
    # Access to bing.com
    browser.get('https://login.live.com/')
    # Wait complete loading
    waitUntilVisible(browser, By.ID, 'loginHeader', 10)
    # Enter email
    print('[LOGIN]', 'Writing email...')
    browser.find_element_by_name("loginfmt").send_keys(email)
    # Click next
    browser.find_element_by_id('idSIButton9').click()
    # Wait 2 seconds
    time.sleep(2)
    # Wait complete loading
    waitUntilVisible(browser, By.ID, 'loginHeader', 10)
    # Enter password
    #browser.find_element_by_id("i0118").send_keys(pwd)
    browser.execute_script("document.getElementById('i0118').value = '" + pwd +
                           "';")
    print('[LOGIN]', 'Writing password...')
    # Click next
    browser.find_element_by_id('idSIButton9').click()
    # Wait 5 seconds
    time.sleep(5)
    # Click Security Check
    print('[LOGIN]', 'Passing security checks...')
    try:
        browser.find_element_by_id('iLandingViewAction').click()
    except (NoSuchElementException, ElementNotInteractableException) as e:
        pass
    # Wait complete loading
    try:
        waitUntilVisible(browser, By.ID, 'KmsiCheckboxField', 10)
    except (TimeoutException) as e:
        pass
    # Click next
    try:
        browser.find_element_by_id('idSIButton9').click()
        # Wait 5 seconds
        time.sleep(5)
    except (NoSuchElementException, ElementNotInteractableException) as e:
        pass
    print('[LOGIN]', 'Logged-in !')
    # Check Login
    print('[LOGIN]', 'Ensuring login on Bing...')
    checkBingLogin(browser, isMobile)
Пример #20
0
 def user_agent_to_session(self,
                           driver: WebDriver = None,
                           session: Session = None) -> None:
     """把driver的user-agent复制到session"""
     driver = driver or self.driver
     session = session or self.session
     selenium_user_agent = driver.execute_script(
         "return navigator.userAgent;")
     session.headers.update({"User-Agent": selenium_user_agent})
Пример #21
0
def download_file(browser: WebDriver, doc_id: str):
    try:
        browser.execute_script("_Layout_DownloadAuthority('" + doc_id +
                               "', 'P001', 'P001', 0)")
        time.sleep(0.5)
        ok = False
        while True:
            input = browser.find_element_by_css_selector(
                'input[name="ValidateCode"]')
            submit = browser.find_elements_by_css_selector(
                '.ui-dialog-buttonset button')[0]
            time.sleep(0.3)
            with open(captcha_temp, 'wb') as file:
                file.write(
                    browser.find_element_by_css_selector(
                        'img[alt="驗證碼圖片"]').screenshot_as_png)
            img = Image.open(captcha_temp)
            img = convert_img(img, 192)
            img.save(captcha2_temp)
            text = pytesseract.image_to_string(img, lang='eng')
            text = str(text).replace(" ", "").replace("\n",
                                                      "").replace("\f", "")
            print('##' + text + '##')
            if text == '' or text is None:
                refresh = browser.find_element_by_xpath(
                    '/html/body/div[10]/div[2]/p/a').click()
                time.sleep(0.1)
                continue
            input.send_keys(text)
            time.sleep(0.1)
            submit.click()
            time.sleep(0.1)
            try:
                browser.switch_to.alert.accept()
                continue
            except NoAlertPresentException:
                pass
    except BaseException:
        pass
    print(str(os.listdir(download_temp_path)))
    while len(os.listdir(download_temp_path)) == 0:
        time.sleep(1)
    time.sleep(1)
    return download_temp_path + os.path.sep + os.listdir(download_temp_path)[0]
Пример #22
0
def _scroll_up_like_human(driver: WebDriver,
                          step=50,
                          wait=0.03,
                          verbose=False):
    pos = driver.execute_script('return window.pageYOffset;')
    if verbose:
        print("pos0: ", pos)
    prev_yoffset = -1

    while True:
        driver.execute_script(f"window.scrollTo(0,{pos})")
        pos -= step * random.lognormvariate(0, 0.1)
        _human_wait(wait)

        yoffset = driver.execute_script('return window.pageYOffset;')
        if yoffset == prev_yoffset or should_stop():
            break

        prev_yoffset = yoffset
Пример #23
0
def _scroll_to_y(driver: WebDriver, target_y: int, step=70, verbose=False):

    # print( f'target_t = {target_y}')
    while True:
        cur_y = driver.execute_script('return window.pageYOffset')
        if verbose:
            print(f'scroll_to_y: cur_y: {cur_y}')

        if abs(target_y - cur_y) < 50:
            driver.execute_script(f"window.scrollTo(0, {target_y})")
            break
        else:
            direction = +1.0 if (target_y - cur_y) >= 0 else -1.0
            next_y = int(cur_y +
                         direction * step * random.lognormvariate(0, 0.2))
            # print(cur_y, next_y)
            driver.execute_script(f"window.scrollTo(0, {next_y})")

        if should_stop(): break
        _human_wait(0.05)
def completeMorePromotionThisOrThat(browser: WebDriver, cardNumber: int):
    browser.find_element_by_xpath(
        '//*[@id="more-activities"]/div/mee-card[' + str(cardNumber) +
        ']/div/card-content/mee-rewards-more-activities-card-item/div/div[3]/a'
    ).click()
    time.sleep(1)
    browser.switch_to.window(window_name=browser.window_handles[1])
    time.sleep(8)
    if not waitUntilQuizLoads(browser):
        resetTabs(browser)
        return
    browser.find_element_by_xpath('//*[@id="rqStartQuiz"]').click()
    waitUntilVisible(browser, By.XPATH,
                     '//*[@id="currentQuestionContainer"]/div/div[1]', 10)
    time.sleep(3)
    for question in range(10):
        answerEncodeKey = browser.execute_script("return _G.IG")

        answer1 = browser.find_element_by_id("rqAnswerOption0")
        answer1Title = answer1.get_attribute('data-option')
        answer1Code = getAnswerCode(answerEncodeKey, answer1Title)

        answer2 = browser.find_element_by_id("rqAnswerOption1")
        answer2Title = answer2.get_attribute('data-option')
        answer2Code = getAnswerCode(answerEncodeKey, answer2Title)

        correctAnswerCode = browser.execute_script(
            "return _w.rewardsQuizRenderInfo.correctAnswer")

        if (answer1Code == correctAnswerCode):
            answer1.click()
            time.sleep(8)
        elif (answer2Code == correctAnswerCode):
            answer2.click()
            time.sleep(8)

    time.sleep(5)
    browser.close()
    time.sleep(2)
    browser.switch_to.window(window_name=browser.window_handles[0])
    time.sleep(2)
Пример #25
0
    async def _get_request_metadata_from_web_driver(
            cls, driver: WebDriver) -> Tuple[int, int, int, int]:
        response: Tuple[int, str, str, str] = driver.execute_script(
            comment_metadata_javascript)
        request_id: int = response[0]
        type_id: int = int(response[1])
        item_id: int = int(response[2])
        max_comments: int = int(
            non_number_replacement_regex.sub(repl='', string=response[3])
            or '0')

        return request_id, type_id, item_id, max_comments
Пример #26
0
 def do(self, task_driver: WebDriver, task_url: str, timeout: int):
     task_driver.get(url=task_url)
     if check_task.check_wrap(task_driver=task_driver):
         return None
     bar = tqdm(
         desc=TASK_IDE[TASK_ID[1]],
         total=timeout,
         leave=False,
         ncols=BAR_LENGTH
     )
     for i in range(1, timeout+1):
         bar.update(1)
         mix: float = get_random.get_random_float(
             a=0,
             b=10
         )
         js: str = PAGE_ROLL_JS.format(i*10+mix)
         task_driver.execute_script(script=js)
         time.sleep(1)
     bar.close()
     self.__success = True
Пример #27
0
def search_webku(driver: WebDriver):
    print("开始加载网页..")
    try:
        driver.set_page_load_timeout(PAGE_LOAD_TIMEOUT)
        driver.set_script_timeout(PAGE_LOAD_TIMEOUT)
        driver.get(WENKU_URL)
        driver.set_page_load_timeout(NORMAL_FIND_ELEMENT_TIMEOUT)
        driver.set_script_timeout(NORMAL_FIND_ELEMENT_TIMEOUT)
    except:
        # 不要停止加载网页, 因为子page的内容需要懒加载
        # driver.execute_script("window.stop()")
        pass

    # 点击继续阅读加载所有子页面
    print("页面加载成功,尝试寻找[继续阅读]按钮")
    find_more_butto_and_click(driver)

    text = ""
    page_div_list: List[WebElement] = driver.find_elements_by_class_name(
        "reader-page")
    print(f"共找到{len(page_div_list)}页, 开始循环解析..")
    for index in range(0, len(page_div_list)):
        page_div = page_div_list[index]
        # 滚动网页到指定page
        driver.execute_script("arguments[0].scrollIntoView();", page_div)
        # 懒加载page中的内容
        wait_until_found_p_element(driver)
        # 获取当前page的内容
        text += get_page_text(page_div)
        print(f"第{index + 1}页数据解析成功")

    # 消除多余的换行符
    text = remove_needless_newline(text)

    print("所有页面解析完成, 准备写入文件")
    print(f"\n---------start--------\n{text}\n---------end--------")

    with open(WENKU_FILE, "w") as wf:
        wf.write(text)
    print(f"所有页面数据已写入 [{WENKU_FILE}] 中")
def completePunchCard(browser: WebDriver, url: str, childPromotions: dict):
    browser.get(url)
    for child in childPromotions:
        if child['complete'] == False:
            if child['promotionType'] == "urlreward":
                browser.execute_script("document.getElementsByClassName('offer-cta')[0].click()")
                time.sleep(1)
                browser.switch_to.window(window_name = browser.window_handles[1])
                time.sleep(random.randint(13, 17))
                browser.close()
                time.sleep(2)
                browser.switch_to.window(window_name = browser.window_handles[0])
                time.sleep(2)
            if child['promotionType'] == "quiz":
                browser.execute_script("document.getElementsByClassName('offer-cta')[0].click()")
                time.sleep(1)
                browser.switch_to.window(window_name = browser.window_handles[1])
                time.sleep(8)
                counter = str(browser.find_element_by_xpath('//*[@id="QuestionPane0"]/div[2]').get_attribute('innerHTML'))[:-1][1:]
                numberOfQuestions = max([int(s) for s in counter.split() if s.isdigit()])
                for question in range(numberOfQuestions):
                    browser.execute_script('document.evaluate("//*[@id=\'QuestionPane' + str(question) + '\']/div[1]/div[2]/a[' + str(random.randint(1, 3)) + ']/div", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.click()')
                    time.sleep(5)
                    browser.find_element_by_xpath('//*[@id="AnswerPane' + str(question) + '"]/div[1]/div[2]/div[4]/a/div/span/input').click()
                    time.sleep(3)
                time.sleep(5)
                browser.close()
                time.sleep(2)
                browser.switch_to.window(window_name = browser.window_handles[0])
                time.sleep(2)
Пример #29
0
    def next_search_page(self, driver: WebDriver) -> int:
        c = self.config
        try:
            driver.implicitly_wait(3)
            next_button = driver.find_element_by_css_selector(c["extras"]["next_page_btn"])

            if next_button.is_enabled():
                print("Next page")
                # next_button.click()
                script = f"document.querySelector('{c['extras']['next_page_btn']}').click()"
                driver.execute_script(script)

                self.wait.until(ec.presence_of_element_located((By.CLASS_NAME, c["extras"]["search_page_data"])),
                                'Items not found in this page')

                return self.NEXT_PAGE_EXISTS
            else:
                return self.NEXT_PAGE_DEAD

        except TimeoutException as err:
            print(err)
            if self.args["debug"]:
                print("*******************************************\n")
                print("Last Four Exceptions")
                import traceback
                traceback.print_exc()
                print("\n*******************************************")

            return self.NEXT_PAGE_DEAD

        except NoSuchElementException:
            if self.args["debug"]:
                print("*******************************************\n")
                print("Last Four Exceptions")
                import traceback
                traceback.print_exc()
                print("\n*******************************************")

            return self.NEXT_PAGE_DEAD
def parse_page(driver: WebDriver) -> lxml.html.HtmlElement:
    """Parse the given web page into an `lxml` HTML element.

    Note: Only the body of the web page is returned (after cleaning).

    Parameters
    ----------
    driver : WebDriver
        The Selenium driver containing the web page to parse

    Returns
    -------
    lxml.html.HtmlElement
        An `lxml` HTML element containing the body of the web page
    """
    # Scroll to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Parse the page's body using lxml and creates a correct html document
    raw_html = lxml.html.document_fromstring(driver.page_source)

    # Clean the html
    cleaner = lxml.html.clean.Cleaner(
        page_structure=False,
        frames=False,
        forms=False,
        annoying_tags=False,
        safe_attrs=lxml.html.defs.safe_attrs | {"aria-label"},
        remove_unknown_tags=False,
    )
    cleaner(raw_html)

    # Obtain the body
    body_html = raw_html.find("body")

    # Scroll back to the top of the page
    driver.execute_script("window.scrollTo(0, 0);")
    return body_html