def get_child_cat_urls(parent_cat_url): c = Chrome() cat_url_list = [] link_index = 0 while True: c.get(parent_cat_url) cat_links = c.xpath("//ul[@class='child']/li/a") if link_index == len(cat_links): break cat_link = cat_links[link_index] name = cat_link.text print(name) cat_link.click() print(c.current_url) cat_url_list.append((name, c.current_url)) link_index += 1 return cat_url_list
def get_raw_meta_texts(url): print('running Chrome...') c = Chrome() c.get(url) _scroll_to_the_bottom_google_image_search(c) print('exacting by xpath...') elements = c.xpath_lxml('//*[@class = "rg_meta notranslate"]') raw_meta_texts = list(map(lambda x: x.text_content(), elements)) c.quit() return raw_meta_texts
def get_cybersyndrome(): chrome = Chrome() url = "http://www.cybersyndrome.net/" chrome.get(url) anonymous_page_link_xpath = "//td[contains(.,'Anonymous') and .//following-sibling::td[contains(.,'ユーザのIPアドレスを含む環境変数を出力しないプロキシ')]]" chrome.xpath(anonymous_page_link_xpath)[0].click() proxy_list_xpath = "//ol/li/a" proxy_elements = chrome.xpath(proxy_list_xpath) anonymous_proxy_list = [e.text for e in proxy_elements] anonymous_proxy_list = _strip_proxy_list(anonymous_proxy_list) return anonymous_proxy_list
def test(): c = Chrome(cookie_key=None, headless=False) login(c) ipo_list_url = "https://hometrade.nomura.co.jp/web/rmfTrdStkIpoLstAction.do" c.get(ipo_list_url) ipo_index = 0 while True: ipo_buttons_xpath = "//a[@class='btn-secondary ico-r']" ipo_buttons = c.xpath(ipo_buttons_xpath) if ipo_index < len(ipo_buttons): ipo_buttons[ipo_index].click() apply_ipo(c) ipo_index += 1 c.get(ipo_list_url) else: break
def login_test(): from umihico_commons.chrome_wrapper import Chrome from passwords import load_passwords password_dict = load_passwords() chrome = Chrome() test_login_funcs = [ (nomura_login, "nomura"), # (daiwa_login, "daiwa"), # (mizuho_login, "mizuho"), # (nikko_login, "nikko"), # (mufj_login, "mufj"), # (sbi_login, "sbi"), # (monex_login, "monex"), # (matsui_login, "matsui"), ] for test_login_func, password_key in test_login_funcs: login_args = password_dict[password_key]["login_args"] test_login_func(chrome, *login_args)
def get_freeproxylists(): chrome = Chrome() url = "http://www.freeproxylists.net/ja/?c=&pt=&pr=&a%5B%5D=1&a%5B%5D=2&u=0" chrome.get(url) all_anonymous_proxy_list = [] while True: try: trs_xpath = "//table[@class='DataGrid']/tbody/tr" trs = chrome.xpath(trs_xpath) header_names = [td.text for td in trs[0].xpath("./td")] ideal_names = [ "IPアドレス", 'ポート', 'プロトコル', '匿名性', '国', '地域', '市', '稼働率', '応答速度', '転送速度', ] for ideal, actual in zip(ideal_names, header_names): if ideal != actual: raise Exception(f"{ideal_names},{header_names}") for tr in trs[1:]: td_texts = [td.text for td in tr.xpath('./td')] dict_ = { name: text for name, text in zip(ideal_names, td_texts) } if "Anonymous" in dict_["匿名性"]: ip = dict_["IPアドレス"] port = dict_["ポート"] proxy = f'{ip}:{port}' anonymous_proxy_list.append(proxy) nexts = chrome.xpath("//div[@class='page']/a[contains(.,'次へ')]") if len(nexts) > 0: nexts[0].click() else: break anonymous_proxy_list = _strip_proxy_list(anonymous_proxy_list) all_anonymous_proxy_list.extend(anonymous_proxy_list) except (Exception, ) as e: print_exc() break return all_anonymous_proxy_list
("//input[@id='passwd1']", login_password), ("//input[@id='passwd1']", Keys.RETURN), ] chrome.do_actions(actions) if __name__ == '__main__': from umihico_commons.chrome_wrapper import Chrome try: from login import * except (Exception, ) as e: from .login import * from passwords import load_passwords password_dict = load_passwords() chrome = Chrome() test_funcs = [ (nomura_login, nomura_apply_ipo, "nomura"), # (daiwa_login,daiwa_apply_ipo "daiwa"), # (mizuho_login,mizuho_apply_ipo "mizuho"), # (nikko_login,nikko_apply_ipo "nikko"), # (mufj_login,mufj_apply_ipo "mufj"), # (sbi_login,sbi_apply_ipo "sbi"), # (monex_login,monex_apply_ipo "monex"), # (matsui_login,matsui_apply_ipo "matsui"), ] for test_login_func, test_apply_func, password_key in test_funcs: login_args = password_dict[password_key]["login_args"] test_login_func(chrome, *login_args) apply_args = password_dict[password_key]["apply_args"] applied_codes = test_apply_func(chrome, *apply_args)
bidhistory_trs = lxml_root.xpath( "//div[@id='modCtgSearchResult']/div[@class='untBody']/table")[0].xpath(".//tr") bidhistory = [] for tr in bidhistory_trs: tds = tr.xpath(".//td") td_texts = [td.text_content() for td in tds] bidhistory.append(td_texts) return bidhistory if __name__ == '__main__': from lxml.html import fromstring from umihico_commons.requests_common import headers_dict_user_agent from umihico_commons.chrome_wrapper import Chrome from pprint import pprint from requests import get ID = "q204067425" url = _gen_url_from_auc_id(ID) print(url) # res = get(url, headers=headers_dict_user_agent) c = Chrome() c.get(url) src = c.page_source try: # lxml_root = fromstring(res.text) lxml_root = fromstring(src) bid_hist = parse(lxml_root) pprint(bid_hist) except (Exception, ) as e: print(res.text)
from umihico_commons.chrome_wrapper import Chrome import codecs if __name__ == '__main__': c = Chrome() # c.get("https://auctions.yahoo.co.jp/closedsearch/closedsearch?select=06&ei=UTF-8&n=100&auccat=26318&istatus=1") # with codecs.open('src0.txt', 'w', 'utf-8') as f: # f.writelines(c.page_source) c.get("https://page.auctions.yahoo.co.jp/jp/auction/w220186143") with codecs.open('src1.txt', 'w', 'utf-8') as f: f.writelines(c.page_source)
def test_access(): url = "https://b2b-ch.infomart.co.jp/company/search/list.page?1942&chi=23&cha=13" from selenium.webdriver import ActionChains from selenium.webdriver.common.keys import Keys c = Chrome() actions = ActionChains(c) c.get(url) links = c.find_elements_by_xpath("//a[@id='lnkCompanyName']") main_tab = c.current_window_handle for link in links: actions.key_down(Keys.CONTROL).click(link).key_up( Keys.CONTROL).perform() while True: try: c.switch_tab(index=1) xpath_ = "//div[@class='co-detail-tbl-row']" rows = c.find_elements_by_xpath(xpath_) if len(rows) == 0: raise text = c.find_element_by_xpath("//div[@class='main-area']") print(text.text) # tab.close() c.close() # len_ = len(c.window_handles) # while True: # c.close_tab() # if len(c.window_handles) == 1: # break c.switch_to_window(main_tab) except (Exception, ) as e: print(e) else: break c.quit()