def geocontrib_delete_project(project_name, admin_url): get_driver().get(admin_url) get_driver().find_element_by_link_text("Projets").click() get_driver().find_element_by_link_text(project_name).click() get_driver().find_element_by_link_text("Supprimer").click() get_driver().find_element_by_xpath( u"//input[@value='Oui, je suis sûr']").click()
def geocontrib_delete_feature(admin_url, feature_name): get_driver().get(admin_url) get_driver().find_element_by_link_text("Signalements").click() get_driver().find_element_by_link_text(feature_name).click() get_driver().find_element_by_link_text("Supprimer").click() get_driver().find_element_by_xpath( u"//input[@value='Oui, je suis sûr']").click()
def geocontrib_create_feature(feature_type_name, feature_name): get_driver().find_element_by_xpath( "//div[2]/div/div/div/div/a[2]/i").click() get_driver().find_element_by_id("id_title").click() get_driver().find_element_by_id("id_title").clear() get_driver().find_element_by_id("id_title").send_keys(feature_name) get_driver().find_element_by_link_text("Dessiner un point").click()
def geocontrib_json_export(project_name, feature_type_name, added_text): get_driver().find_element_by_xpath("//img").click() get_driver().find_element_by_link_text(project_name).click() get_driver().find_element_by_link_text("{}{}".format( feature_type_name, added_text)).click() get_driver().find_element_by_link_text("Exporter").click() get_driver().find_element_by_xpath("//div[3]/div/div").click()
def geocontrib_create_featuretype(feature_type_name): get_driver().find_element_by_link_text( u"Créer un nouveau type de signalement").click() get_driver().find_element_by_id("id_title").click() get_driver().find_element_by_id("id_title").clear() get_driver().find_element_by_id("id_title").send_keys(feature_type_name) get_driver().find_element_by_xpath("//button[@type='submit']").click()
def geocontrib_draft_search_map(project_name): get_driver().find_element_by_link_text(u"GéoContrib").click() get_driver().find_element_by_link_text(project_name).click() get_driver().find_element_by_id("map").click() get_driver().find_element_by_xpath( "//form[@id='form-filters']/div[2]/div/input[2]").click() get_driver().find_element_by_xpath( "//form[@id='form-filters']/div[2]/div/div[2]/div").click()
def geocontrib_delete_layer(admin_url, layer_title, layer_url, layer_type): get_driver().get(admin_url) get_driver().find_element_by_link_text("Couches").click() get_driver().find_element_by_link_text("{} - {} ({})".format( layer_title, layer_url, layer_type.lower())).click() get_driver().find_element_by_link_text("Supprimer").click() get_driver().find_element_by_xpath( u"//input[@value='Oui, je suis sûr']").click()
def geocontrib_json_import(): get_driver().find_element_by_xpath( "//form[@id='form-import-features']/div/label/span").click() get_driver().find_element_by_id("json_file").click() time.sleep(1) get_driver().send_keys("export_projet.json") get_driver().find_element_by_xpath("//button[@type='submit']").click()
def brfore_func(self): self.driver = get_driver() self.add_page = AddPage(self.driver) self.setting_page = SettingPage(self.driver) yield time.sleep(3) self.driver.quit()
def get_imdb_list_page_link(): class PageLink(tool.MovieMetadata): def __init__(self, link): self.link = link driver = utils.get_driver() pre_link = "https://www.imdb.com/search/title/?release_date=2010-01-01,2020-12-31&runtime=60,300&start=9951&ref_=adv_nxt" pre_link = "https://www.imdb.com/search/title/?release_date=2000-01-01,2009-12-31&runtime=60,300" try: pre_df = pandas.read_csv(pathmng.imdb_next_link_path) pre_link = list(pre_df["link"])[-1] except: pass link_list = [] driver.get(pre_link) import time for i in range(1, 1000): link = driver.find_element_by_xpath( '//*[@class="lister-page-next next-page"]').get_attribute("href") link_list.append(PageLink(link)) driver.find_element_by_xpath( '//*[@class="lister-page-next next-page"]').click() time.sleep(2) if i % 50 == 0: tool.save_metadata_to_csv( utils.filter_duplicate_preserve_order(link_list), pathmng.imdb_next_link_path) link_list.clear()
def setUp(self): self.logger = get_logger('Authentication') self.driver = get_driver() # self.driver.maximize_window() self.url = parser.get('site_to_test', 'url') self.driver.get(self.url) self.driver.implicitly_wait(3)
def get_tf_class(): storage = get_driver(options.storage_driver) tf_info = storage.get_tf_info() if not tf_info['class_path']: print('Test Fixture Class is not defined please use admin to set class') exit(0) return tf_info['class_path']
def local_main(): """ Main Function! make output file at /output/ """ # If True, also run selenium code. Else, only make output python file. run_selenium = args.run_selenium use_stanford_corenlp = args.use_corenlp pathsaver = PathSaver() if not use_stanford_corenlp: nlp = nltk else: from utils import get_stanford_parser nlp = get_stanford_parser(pathsaver.parser_path) allennlp = get_allen_parser(pathsaver.allen_path) sample_sents = [ 'Enter the "KAIST" in "SearchBox" and click the "Search" button', 'Wait the "3 seconds".', 'Refresh the website and move to "https://www.naver.com/".' ] sample_tuple = [("SearchBox", 'q'), ("Search", 'btnK')] # TODO: Change the above sample sents and tuple into Chrome extension output using 'read_extension_output()' for sample_sent in sample_sents: driver = get_driver(pathsaver.driver_path) if run_selenium else None main_helper(sample_tuple, sample_sent, nlp, allennlp, driver, pathsaver.driver_path, run_selenium)
def report_main(jm_code, rcp_no): try: # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) # 주총 결의의 rcpno 히스토리 rcpno_list = get_rcpno_list(driver) # 최초 문서의 공고년도 first_rcp_yy = rcpno_list[0][:4] conn = get_dbcon('esg') cursor = conn.cursor() # 보상위원회 유무 확인 get_tab(driver, 'b') bosang_yn = get_board_yn(driver, 'b') print(bosang_yn) driver.switch_to_default_content() # 감사위원회 유무 확인 get_tab(driver, 'g') gamsa_yn = get_board_yn(driver, 'g') print(gamsa_yn) # --------------------------------------------------------------------------------- # # DB 삽입 # 중복체크 insert_qry = """insert into proxy700_tmp values('{0}', '{1}', '{2}', '{3}')""".format( jm_code, '2018', bosang_yn, gamsa_yn) cursor.execute(insert_qry) finally: cursor.close() close_dbcon(conn) close_driver(driver)
def setUp(self): self.logger = get_logger('JobPlacement') self.driver = get_driver() # self.driver.maximize_window() self.url = parser.get('site_to_test', 'url') self.driver.get(self.url) self.driver.find_element_by_link_text("Job Placement Program").click()
def before_function(self): # 获取驱动对象 self.driver = get_driver() self.page_factory = PageFactory(self.driver) # 实例化统一入口类 yield # 结束 # time.sleep(3) self.driver.quit() # 关闭驱动对象
def get_movie_id(num=100): driver = utils.get_driver() driver.get( "https://www.rottentomatoes.com/browse/dvd-streaming-all?minTomato=0&maxTomato=100&services=amazon;hbo_go;itunes;netflix_iw;vudu;amazon_prime;fandango_now&genres=1;2;4;5;6;8;9;10;11;13;18;14&sortBy=release" ) # click show more button time.sleep(2) while True: driver.find_element_by_xpath('//*[@id="show-more-btn"]/button').click() time.sleep(2) num_movies = len( driver.find_elements_by_xpath('//*[@class="movie_info"]/a')) print(f"Find {num_movies} movies") if num_movies > num: break movies = driver.find_elements_by_xpath('//*[@class="movie_info"]/a') movie_id_set = set() for ele in movies: movie_id_set.add(str(ele.get_attribute("href")).split("/")[-1]) with open(movie_id_path, "w") as f: f.write("\n".join(list(movie_id_set))) driver.close()
def setUp(self): self.classes = [] self.active_class = None self.logger = get_logger('Groupclass') self.driver = get_driver() self.url = parser.get('site_to_test', 'url') self.driver.get(self.url) self.driver.maximize_window()
def setUp(self): self.logger = get_logger('AccountSettings') self.driver = get_driver() self.url = parser.get('site_to_test', 'url') self.driver.get(self.url) self.driver.maximize_window() self.driver.implicitly_wait(3)
def get_Academy_Award_for_Best_Actor_Director(): driver = utils.get_driver() urls = ["https://en.wikipedia.org/wiki/Academy_Award_for_Best_Actor", "https://en.wikipedia.org/wiki/Academy_Award_for_Best_Director"] data = [] for url in urls[0:2]: data.extend(get_academy_award_for_best_people(driver, url)) tool.save_metadata_to_csv(data, pathmng.wiki_best_actor_director_path)
def get_rcpNo(jm_code, keywod, st_dt, ed_dt): driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsab002/main.do#') # 드라이버 로드 driver.implicitly_wait(10) driver.find_element_by_name('textCrpNm').send_keys(jm_code) # 종목코드 driver.find_element_by_xpath( '//*[@id="searchForm"]/fieldset/div/p[3]/span[2]/a[7]').click() # 기간 #driver.find_element_by_name('startDate').send_keys(st_dt) # 기간_시작 #driver.find_element_by_name('endDate').send_keys(ed_dt) # 기간_종료 driver.find_element_by_name('reportName').send_keys(keywod) # 검색어 time.sleep(1) driver.find_element_by_xpath( '//*[@id="searchForm"]/fieldset/div/p[8]/input').click() # 검색 res_list = driver.find_elements_by_xpath( '//*[@id="listContents"]/div[1]/table/tbody/tr') # 결과 리스트 # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함) # 결과 리스트에서 가용 데이터 추출 if len(res_list) == 0: print('검색 결과가 없습니다.') return 0 else: item = res_list[0] # 문서번호 rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name( 'a').get_attribute('href')[-14:] # 기재정정 rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name( 'a').find_element_by_tag_name('span').text # 시장구분 rcp_gb = item.find_elements_by_tag_name('td')[5].find_element_by_tag_name( 'img').get_attribute('title') if '유가' in rcp_gb: rcp_gb = 'K' else: rcp_gb = 'Q' if len(rcp_no) != 14: print('rcpNo 형식이 다릅니다.') return 0 if '첨부' in rcp_yn: print('첨부정정은 수집대상 제외') return 0 close_driver(driver) return rcp_no
def get_rcpNo(jm_code, keywod): driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsab002/main.do#') # 드라이버 로드 driver.find_element_by_name('textCrpNm').send_keys(jm_code) # 종목코드 driver.find_element_by_xpath( '//*[@id="searchForm"]/fieldset/div/p[3]/span[2]/a[4]').click( ) # 기간 : 1년 checked = driver.find_element_by_xpath( '//*[@id="finalReport"]').get_attribute('checked') # 최종보고서 여부 #if checked: # driver.find_element_by_xpath('//*[@id="finalReport"]').click() # 최종보고서 체크 해제 driver.find_element_by_id('reportName').send_keys(keywod) # 검색구분 : 결의 driver.find_element_by_xpath( '//*[@id="searchForm"]/fieldset/div/p[8]/input').click() # 검색 res_list = driver.find_elements_by_xpath( '//*[@id="listContents"]/div[1]/table/tbody/tr') # 결과 리스트 # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함) # 결과 리스트에서 가용 데이터 추출 if len(res_list) == 0: print('검색 결과가 없습니다.') return 0 else: item = res_list[0] for i in range(0, len(res_list)): if '2018.12' in res_list[i].find_elements_by_tag_name( 'td')[2].find_element_by_tag_name('a').text: item = res_list[i] # 문서번호 rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name( 'a').get_attribute('href')[-14:] # 기재정정 rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name( 'a').find_element_by_tag_name('span').text if len(rcp_no) != 14: print('rcpNo 형식이 다릅니다.') return 0 if '첨부' in rcp_yn: print('첨부정정은 수집대상 제외') return 0 close_driver(driver) return rcp_no
def hando_main(jm_code, rcp_no, gijun_yy): # driver 세팅 driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) hando = [] hando.extend(get_hando(driver, jm_code, gijun_yy)) for h in hando: print(h) # driver close close_driver(driver)
def get_html(rcp_no): # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) # 주총결의 데이터 세팅 driver.switch_to.frame(driver.find_element_by_tag_name("iframe")) html = driver.page_source print(html) f_nm = 'C:\\Users\\admin\\Desktop\\html\\{0}.html'.format(rcp_no) f = open(f_nm, 'w') f.write(html) f.close()
def api_call_main(sample, pathsaver, run_selenium, nlp, allennlp): """ API interaction main function. Input sample pathsaver run_selenium nlp allennlp Output code """ driver = get_driver(pathsaver.driver_path) if run_selenium else None _, code = main_helper(sample['selectors'], sample['text'], nlp, allennlp, driver, pathsaver.driver_path, run_selenium) return code
def main( ): """Process Wilma's IG posts.""" driver = get_driver() with open("posts.txt") as posts: for post in posts: post = post.strip() print(post) driver.get(urljoin( "https://www.instagram.com/", post, )) os.makedirs("posts-data", exist_ok=True) process_html( driver, f"posts-data/{post.split('/')[2]}.json", )
def get_rcpNo(jm_code, keywod): driver = get_driver('C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://kind.krx.co.kr/disclosure/details.do?method=searchDetailsMain#viewer') # 드라이버 로드 driver.find_element_by_xpath('//*[@id="AKCKwd"]').send_keys(jm_code) # 종목코드 time.sleep(1) driver.find_element_by_xpath('//*[@id="search-btn-dates"]/ul/li[5]/a').send_keys(Keys.ENTER) # 기간 : 1년 time.sleep(1) checked = driver.find_element_by_xpath('//*[@id="lastReport"]').get_attribute('checked') # 최종보고서 여부 if checked: driver.find_element_by_xpath('//*[@id="lastReport"]').send_keys(Keys.ENTER) # 최종보고서 체크 해제 driver.find_element_by_id('reportNmTemp').send_keys(keywod) # 검색구분 : 결의 time.sleep(1) driver.find_element_by_xpath('//*[@id="searchForm"]/section[1]/div/div[3]/a[1]').send_keys(Keys.ENTER) # 검색 time.sleep(1) res_list = driver.find_elements_by_xpath('//*[@id="main-contents"]/section[1]/table/tbody/tr') # 결과 리스트 # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함) # 결과 리스트에서 가용 데이터 추출 if len(res_list) == 0: print('검색 결과가 없습니다.') sys.exit(0) else: item = res_list[0] # 문서번호 rcp_no = item.find_elements_by_tag_name('td')[3].find_element_by_tag_name('a').get_attribute('onclick')[-19:-5] print(rcp_no) # 기재정정 #rcp_yn = item.find_elements_by_tag_name('td')[3].find_element_by_tag_name('a').find_element_by_tag_name('font').text rcp_yn = '' # 시장구분 rcp_gb = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name('img').get_attribute('alt') if len(rcp_no) != 14: print('rcpNo 형식이 다릅니다.') sys.exit(0) if '첨부' in rcp_yn: print('첨부정정은 수집대상 제외') sys.exit(0) close_driver(driver) return rcp_no, rcp_yn, rcp_gb
def get_rcpNo(jm_code, keywod, st_dt, en_dt): driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsab002/main.do#') # 드라이버 로드 driver.find_element_by_name('textCrpNm').send_keys(jm_code) # 종목코드 driver.find_element_by_xpath('//*[@id="startDate"]').send_keys( st_dt) # 기간 시작 driver.find_element_by_xpath('//*[@id="endDate"]').send_keys( en_dt) # 기간 종료 driver.find_element_by_id('reportName').send_keys(keywod) # 검색구분 : 결의 driver.find_element_by_xpath( '//*[@id="searchForm"]/fieldset/div/p[8]/input').click() # 검색 res_list = driver.find_elements_by_xpath( '//*[@id="listContents"]/div[1]/table/tbody/tr') # 결과 리스트 # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함) # 결과 리스트에서 가용 데이터 추출 if len(res_list) == 0: print('검색 결과가 없습니다.') sys.exit(0) else: item = res_list[0] # 문서번호 rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name( 'a').get_attribute('href')[-14:] # 기재정정 rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name( 'a').find_element_by_tag_name('span').text if len(rcp_no) != 14: print('rcpNo 형식이 다릅니다.') sys.exit(0) if '첨부' in rcp_yn: print('첨부정정은 수집대상 제외') sys.exit(0) close_driver(driver) return rcp_no, rcp_yn
def create_accounts(): while True: logging.info("Getting proxy") proxy = get_proxy() logging.info(f"Got proxy, {proxy}") for _ in range(10): try: driver = get_driver(proxy) register_link = "https://login.aliexpress.com/" driver.get(register_link) set_location_cookie(driver) email, password = create_new_account(driver) with open("accounts.txt", "a") as myfile: myfile.write(f"{email}:{password}\n") driver.close() except Exception as e: logging.warning(e) break
def get_recovery_url(self, email_name, email_url): print(" + get_recovery_url \n".upper()) print('In get_validation_code') driver_mail = get_driver(True) driver_mail.get("http://www." + email_url) wait = WebDriverWait(driver_mail, 30) submit_btn = wait.until(EC.element_to_be_clickable((By.ID, 'sm'))) name_field = driver_mail.find_element_by_id("mailbox") name_field.clear() name_field.send_keys(email_name) submit_btn.click() wait = WebDriverWait(driver_mail, 30) wait.until(EC.url_to_be("http://mailnesia.com/mailbox/" + email_name)) driver_mail.find_element_by_xpath("//tbody/tr[1]/td[2]").click() wait = WebDriverWait(driver_mail, 30) wait.until( EC.text_to_be_present_in_element((By.XPATH, '//pre/a[1]'), "http")) code = driver_mail.find_element_by_xpath("//pre/a[1]").text driver_mail.quit() close_firefox() return code
def bd_main(jm_code, rcp_no): try: # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) driver.implicitly_wait(10) bd_gubun, bd_kind, bd_gum, bd_total, bd_gijun_ymd = get_bd_table( driver) conn = get_dbcon('esg') cursor = conn.cursor() # 중복 체크 및 DB 삽입 dup_select = """select * from proxy080 where jm_code = '{0}' and bd_gijun_ymd = '{1}' """.format(jm_code, bd_gijun_ymd) cursor.execute(dup_select) if cursor.rowcount > 0: insert_qry = """update proxy080 set bd_gubun = '{2}', bd_kind = '{3}', bd_gum = {4}, bd_total = {5} where jm_code = '{0}' and bd_gijun_ymd = '{1}' """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind, bd_gum, bd_total) else: insert_qry = """insert into proxy080 values('{0}', '{1}', '{2}', '{3}', {4}, {5}) """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind, bd_gum, bd_total) cursor.execute(insert_qry) finally: cursor.close() close_dbcon(conn) close_driver(driver)
def initialize(self): self.logger = get_logger('StorageHandler') self._storage = get_driver(options.storage_driver)
def __init__(self): self.driver = get_driver()
def set_tf_class(cls_name): storage = get_driver(options.storage_driver) tf_info = storage.get_tf_info() tf_info['class_path'] = cls_name tf_info = storage.set_tf_info(tf_info)