示例#1
0
def install_driver_if_not_installed(target_directory):
    chromedriver_path = os.path.join(target_directory, "chromedriver")
    if os.path.exists(chromedriver_path):
        console.info("Driver already installed skip...")
        return

    install_driver(target_directory)
示例#2
0
def open_browser(driver_path):
    console.info()
    download_driver.install_driver_if_not_installed("driver")

    driver = webdriver.Chrome(driver_path)

    return driver
示例#3
0
def ensure_signin(driver):
    console.info()
    if not is_signed_in(driver):
        load_cookie_if_exists(driver, consts.COOKIE_PATH)

    if not is_signed_in(driver):
        signin(driver)
        save_cookie(driver, consts.COOKIE_PATH)
示例#4
0
def is_signed_in(driver):
    console.info()
    selenium_dispatcher.driver_get(driver,
                                   consts.SITE_HOST_WITHOUT_TRAILING_SLASH)
    try:
        driver.find_element_by_name(consts.SIGNIN_ID_ELEMENT_NAME)
        return False
    except NoSuchElementException:
        return True
示例#5
0
def load_cookie_if_exists(driver, path):
    console.info()
    if not os.path.exists(path):
        return

    with open(path, 'rb') as f:
        cookies = pickle.load(f)
        for cookie in cookies:
            driver.add_cookie(cookie)
示例#6
0
def signin(driver):
    console.info()
    selenium_dispatcher.driver_get(driver,
                                   consts.SITE_HOST_WITHOUT_TRAILING_SLASH)
    elements_id = driver.find_element_by_name(consts.SIGNIN_ID_ELEMENT_NAME)
    elements_pw = driver.find_element_by_name(consts.SIGNIN_PW_ELEMENT_NAME)

    elements_id.send_keys(MY_ACCOUNT)
    elements_pw.send_keys(MY_PASSWORD)
    selenium_dispatcher.element_send_key(elements_pw, Keys.ENTER)
示例#7
0
def clear_blocked_list(driver):
    selenium_dispatcher.driver_get(
        driver, f"{SITE_HOST}/bbs/member_modify.php?group_no=1")

    btn_list = driver.find_elements_by_class_name("butt_red")
    for each_btn in btn_list:
        onclick = each_btn.get_attribute("onclick")
        if onclick and onclick.startswith("ban_clear"):
            selenium_dispatcher.element_click(each_btn)
            selenium_dispatcher.accept_alert(driver)
            return
    console.info(
        "[Clear block user list button] is disabled. It means you have no block users."
    )
示例#8
0
def get_blocked_reply_num_or_none(driver):
    try:
        reply_btns = driver.find_elements_by_class_name("reply2btn")
        comment_idx = 1
        for idx, btn in enumerate(reply_btns):
            if reply_btns[idx].text.startswith("베스트 댓글 "):
                continue
            if reply_btns[idx].text == "BEST":
                continue
            if reply_btns[idx].text == "논란의 댓글":
                continue

            if reply_btns[idx].text != f"댓글 {comment_idx}":
                console.info(f"[댓글 {comment_idx}] not exists.")
                return comment_idx
            comment_idx += 1
    except BaseException:
        pass

    return None
示例#9
0
def accept_alert(driver):
    alert = driver.switch_to.alert
    console.info(f"alert=[{alert.text}]")
    alert.accept()
示例#10
0
def install_driver(target_directory):
    console.info()
    download_driver(target_directory)
    extract_driver(target_directory)
示例#11
0
def save_cookie(driver, path):
    console.info()
    fileio.make_parent_path_if_doesnt_exist(path)
    with open(path, 'wb') as filehandler:
        pickle.dump(driver.get_cookies(), filehandler)
示例#12
0
def save_as_json(path, o):
    console.info(f"path=[{path}], o=[{o}]")
    make_parent_path_if_doesnt_exist(path)
    with open(path, 'w') as f:
        json.dump(o, f, indent=4, sort_keys=True, ensure_ascii=False)
示例#13
0
def search_board(driver, board, no, max_search_post_count=-1):
    console.info()
    clear_blocked_list(driver)
    block_user(driver, board, no)

    data_dirname = f"{board}_{no}"
    data_dirpath = os.path.join("data", data_dirname)
    data_searched_dirpath = os.path.join(data_dirpath, "searched")
    data_found_author_dirpath = os.path.join(data_dirpath, "found_author")
    data_found_replier_dirpath = os.path.join(data_dirpath, "found_replier")

    latest_no = get_latest_article_no(driver, board)
    console.info(f"Search from article no [{latest_no}](latest).")
    no_target = latest_no

    idx = 0
    exitflag = False

    while True:
        if exitflag:
            console.info("exit")
            break
        if no_target < 0:
            console.notice("Every article has searched.")
            break

        curr_no_items = list(
            range(no_target, no_target - EACH_POST_RANDOM_SEARCH_COUNT, -1))
        console.info(
            f"Search article no [{no_target}] ~ [{no_target - EACH_POST_RANDOM_SEARCH_COUNT}] randomly."
        )
        random.shuffle(curr_no_items)
        for curr_no in curr_no_items:
            if curr_no < 0:
                console.notice(f"Skip negative article number. [{curr_no}]")
                continue
            if max_search_post_count != -1 and idx >= max_search_post_count:
                console.notice(f"searched [{max_search_post_count}]")
                exitflag = True
                break

            searched_filepath = os.path.join(data_searched_dirpath,
                                             str(curr_no))
            found_author_filepath = os.path.join(data_found_author_dirpath,
                                                 str(curr_no))
            found_replier_filepath = os.path.join(data_found_replier_dirpath,
                                                  str(curr_no))
            link = get_link_with(board, curr_no)

            if load_from_json_or_none(searched_filepath) is not None:
                console.info(
                    f"[{searched_filepath}] file exists. skip [{link}]")
                continue
            if load_from_json_or_none(found_author_filepath) is not None:
                console.info(
                    f"[{found_author_filepath}] file exists. skip [{link}]")
                continue
            if load_from_json_or_none(found_replier_filepath) is not None:
                console.info(
                    f"[{found_replier_filepath}] file exists. skip [{link}]")
                continue

            get_article(driver, board, curr_no)
            if is_author_blocked_user(driver):
                console.notice(f"Blocked author found. [{link}]")
                save_as_json(found_author_filepath, link)
            else:
                reply_num = get_blocked_reply_num_or_none(driver)
                if reply_num is not None:
                    console.notice(f"Blocked replier found. [{link}]")
                    save_as_json(found_replier_filepath, {
                        "link": link,
                        "reply_num": reply_num,
                    })
                else:
                    save_as_json(searched_filepath, link)

            idx += 1
        no_target -= EACH_POST_RANDOM_SEARCH_COUNT