def browse_nodes(browser, start_nodes): for node in start_nodes: if node.startswith("expand:"): expand_tree_by_text(browser=browser, element_text=node.replace("expand:", "")) else: click_link_by_text(browser=browser, element_text=node)
def close_path(subunit_path, unit_data): logging.info( list( zip(reversed(subunit_path), reversed(unit_data["unitNameListInSite"])))) for (subunit, unitNameInSite) in list( zip(reversed(subunit_path), reversed(unit_data["unitNameListInSite"]))): element_text = "%s%d" % (unitNameInSite, subunit) logging.info(element_text) click_link_by_text(browser=browser, element_text=element_text)
def dump_text(browser, title, out_file_path): previous_page_text = None text = "" while True: page_text = get_page_text(browser=browser) if previous_page_text == page_text: break logging.debug(page_text) text = text + page_text time.sleep(2) selenium.click_link_by_text(browser=browser, element_text="Next") previous_page_text = page_text md_file = md_helper.MdFile(file_path=out_file_path) md_file.dump_to_file(metadata={"title": title}, md=text, dry_run=False)
def open_path(subunit_path, unit_data): logging.debug(list(zip(subunit_path, unit_data["unitNameListInSite"]))) for (subunit, unitNameInSite) in zip(subunit_path, unit_data["unitNameListInSite"]): element_text = "%s%d" % (unitNameInSite, subunit) click_link_by_text(browser=browser, element_text=element_text)
def get_texts(browser, outdir, start_nodes): browse_nodes(browser=browser, start_nodes=start_nodes) os.makedirs(name=outdir, exist_ok=True) dump_text(browser=browser, outdir=outdir) while click_link_by_text(browser=browser, element_text="Next"): dump_text(browser=browser, outdir=outdir)