def install_driver_if_not_installed(target_directory): chromedriver_path = os.path.join(target_directory, "chromedriver") if os.path.exists(chromedriver_path): console.info("Driver already installed skip...") return install_driver(target_directory)
def open_browser(driver_path): console.info() download_driver.install_driver_if_not_installed("driver") driver = webdriver.Chrome(driver_path) return driver
def ensure_signin(driver): console.info() if not is_signed_in(driver): load_cookie_if_exists(driver, consts.COOKIE_PATH) if not is_signed_in(driver): signin(driver) save_cookie(driver, consts.COOKIE_PATH)
def is_signed_in(driver): console.info() selenium_dispatcher.driver_get(driver, consts.SITE_HOST_WITHOUT_TRAILING_SLASH) try: driver.find_element_by_name(consts.SIGNIN_ID_ELEMENT_NAME) return False except NoSuchElementException: return True
def load_cookie_if_exists(driver, path): console.info() if not os.path.exists(path): return with open(path, 'rb') as f: cookies = pickle.load(f) for cookie in cookies: driver.add_cookie(cookie)
def signin(driver): console.info() selenium_dispatcher.driver_get(driver, consts.SITE_HOST_WITHOUT_TRAILING_SLASH) elements_id = driver.find_element_by_name(consts.SIGNIN_ID_ELEMENT_NAME) elements_pw = driver.find_element_by_name(consts.SIGNIN_PW_ELEMENT_NAME) elements_id.send_keys(MY_ACCOUNT) elements_pw.send_keys(MY_PASSWORD) selenium_dispatcher.element_send_key(elements_pw, Keys.ENTER)
def clear_blocked_list(driver): selenium_dispatcher.driver_get( driver, f"{SITE_HOST}/bbs/member_modify.php?group_no=1") btn_list = driver.find_elements_by_class_name("butt_red") for each_btn in btn_list: onclick = each_btn.get_attribute("onclick") if onclick and onclick.startswith("ban_clear"): selenium_dispatcher.element_click(each_btn) selenium_dispatcher.accept_alert(driver) return console.info( "[Clear block user list button] is disabled. It means you have no block users." )
def get_blocked_reply_num_or_none(driver): try: reply_btns = driver.find_elements_by_class_name("reply2btn") comment_idx = 1 for idx, btn in enumerate(reply_btns): if reply_btns[idx].text.startswith("베스트 댓글 "): continue if reply_btns[idx].text == "BEST": continue if reply_btns[idx].text == "논란의 댓글": continue if reply_btns[idx].text != f"댓글 {comment_idx}": console.info(f"[댓글 {comment_idx}] not exists.") return comment_idx comment_idx += 1 except BaseException: pass return None
def accept_alert(driver): alert = driver.switch_to.alert console.info(f"alert=[{alert.text}]") alert.accept()
def install_driver(target_directory): console.info() download_driver(target_directory) extract_driver(target_directory)
def save_cookie(driver, path): console.info() fileio.make_parent_path_if_doesnt_exist(path) with open(path, 'wb') as filehandler: pickle.dump(driver.get_cookies(), filehandler)
def save_as_json(path, o): console.info(f"path=[{path}], o=[{o}]") make_parent_path_if_doesnt_exist(path) with open(path, 'w') as f: json.dump(o, f, indent=4, sort_keys=True, ensure_ascii=False)
def search_board(driver, board, no, max_search_post_count=-1): console.info() clear_blocked_list(driver) block_user(driver, board, no) data_dirname = f"{board}_{no}" data_dirpath = os.path.join("data", data_dirname) data_searched_dirpath = os.path.join(data_dirpath, "searched") data_found_author_dirpath = os.path.join(data_dirpath, "found_author") data_found_replier_dirpath = os.path.join(data_dirpath, "found_replier") latest_no = get_latest_article_no(driver, board) console.info(f"Search from article no [{latest_no}](latest).") no_target = latest_no idx = 0 exitflag = False while True: if exitflag: console.info("exit") break if no_target < 0: console.notice("Every article has searched.") break curr_no_items = list( range(no_target, no_target - EACH_POST_RANDOM_SEARCH_COUNT, -1)) console.info( f"Search article no [{no_target}] ~ [{no_target - EACH_POST_RANDOM_SEARCH_COUNT}] randomly." ) random.shuffle(curr_no_items) for curr_no in curr_no_items: if curr_no < 0: console.notice(f"Skip negative article number. [{curr_no}]") continue if max_search_post_count != -1 and idx >= max_search_post_count: console.notice(f"searched [{max_search_post_count}]") exitflag = True break searched_filepath = os.path.join(data_searched_dirpath, str(curr_no)) found_author_filepath = os.path.join(data_found_author_dirpath, str(curr_no)) found_replier_filepath = os.path.join(data_found_replier_dirpath, str(curr_no)) link = get_link_with(board, curr_no) if load_from_json_or_none(searched_filepath) is not None: console.info( f"[{searched_filepath}] file exists. skip [{link}]") continue if load_from_json_or_none(found_author_filepath) is not None: console.info( f"[{found_author_filepath}] file exists. skip [{link}]") continue if load_from_json_or_none(found_replier_filepath) is not None: console.info( f"[{found_replier_filepath}] file exists. skip [{link}]") continue get_article(driver, board, curr_no) if is_author_blocked_user(driver): console.notice(f"Blocked author found. [{link}]") save_as_json(found_author_filepath, link) else: reply_num = get_blocked_reply_num_or_none(driver) if reply_num is not None: console.notice(f"Blocked replier found. [{link}]") save_as_json(found_replier_filepath, { "link": link, "reply_num": reply_num, }) else: save_as_json(searched_filepath, link) idx += 1 no_target -= EACH_POST_RANDOM_SEARCH_COUNT