def report_main(jm_code, rcp_no): try: # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) # 주총 결의의 rcpno 히스토리 rcpno_list = get_rcpno_list(driver) # 최초 문서의 공고년도 first_rcp_yy = rcpno_list[0][:4] conn = get_dbcon('esg') cursor = conn.cursor() # 보상위원회 유무 확인 get_tab(driver, 'b') bosang_yn = get_board_yn(driver, 'b') print(bosang_yn) driver.switch_to_default_content() # 감사위원회 유무 확인 get_tab(driver, 'g') gamsa_yn = get_board_yn(driver, 'g') print(gamsa_yn) # --------------------------------------------------------------------------------- # # DB 삽입 # 중복체크 insert_qry = """insert into proxy700_tmp values('{0}', '{1}', '{2}', '{3}')""".format( jm_code, '2018', bosang_yn, gamsa_yn) cursor.execute(insert_qry) finally: cursor.close() close_dbcon(conn) close_driver(driver)
def nps001_select(rcpno): try: conn = get_dbcon('esg') cursor = conn.cursor() finally: cursor.close() close_dbcon(conn)
def get_rcplist(time, gubun): if gubun == 'R': document_gb = resolution info_logeer_rs.info(start_msg_rs) else: document_gb = notice info_logeer_nt.info(start_msg_nt) try: conn = get_dbcon('esg') cursor = conn.cursor() # 수집 기간은 [last_time] ~ [수집 시작 시점] rcp_select = """SELECT crp_cd, rpt_nm, rcp_no, date_format(regdate, '%Y%m%d%H%i%s'), crp_cls FROM gsmda000 WHERE crp_cd IN ({0}) AND regdate > '{1}' AND rpt_nm LIKE '%{2}%' AND rpt_nm NOT LIKE '%첨부정정%' ORDER BY regdate """.format(jm_code, time, document_gb) rcp_test = "select crp_cd, rpt_nm, rcp_no, date_format(regdate, '%Y%m%d%H%i%s'), crp_cls from gsmda000 where rcp_no = '20190320800644'" cursor.execute(rcp_select) rcpnos = cursor.fetchall() result = [] jm_list = [] for rcpno in rcpnos: # 중복체크 dup_select = "select * from proxy011 where rcp_no = '{0}'".format( rcpno[2]) cursor.execute(dup_select) if cursor.rowcount > 0: dup_msg = 'Duplicated rcp_no. (jm_code : [{0}])'.format( rcpno[0]) if gubun == 'R': info_logeer_rs.info(dup_msg) else: info_logeer_nt.info(dup_msg) continue # 수집 목록 리스트 result.append(rcpno) jm_list.append(rcpno[0]) if gubun == 'R': info_logeer_rs.info('jm_code list : {0}'.format(jm_list)) else: info_logeer_nt.info('jm_code list : {0}'.format(jm_list)) return result except Exception as e: if gubun == 'R': error_logeer_rs.error(e) else: error_logeer_nt.error(e) finally: cursor.close() close_dbcon(conn)
def set_crawl_time(time, gubun): try: conn = get_dbcon('esg') cursor = conn.cursor() upd_ctime = """update proxy000 set c_time = '{0}' where notice_gb = '{1}' """.format(time, gubun) cursor.execute(upd_ctime) finally: cursor.close() close_dbcon(conn)
def get_crawl_time(gubun): try: conn = get_dbcon('esg') cursor = conn.cursor() ld_select = "select c_time from proxy000 where notice_gb = '{0}'".format( gubun) cursor.execute(ld_select) last_date = cursor.fetchone() return last_date[0] finally: cursor.close() close_dbcon(conn)
def bd_main(jm_code, rcp_no): try: # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) driver.implicitly_wait(10) bd_gubun, bd_kind, bd_gum, bd_total, bd_gijun_ymd = get_bd_table( driver) conn = get_dbcon('esg') cursor = conn.cursor() # 중복 체크 및 DB 삽입 dup_select = """select * from proxy080 where jm_code = '{0}' and bd_gijun_ymd = '{1}' """.format(jm_code, bd_gijun_ymd) cursor.execute(dup_select) if cursor.rowcount > 0: insert_qry = """update proxy080 set bd_gubun = '{2}', bd_kind = '{3}', bd_gum = {4}, bd_total = {5} where jm_code = '{0}' and bd_gijun_ymd = '{1}' """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind, bd_gum, bd_total) else: insert_qry = """insert into proxy080 values('{0}', '{1}', '{2}', '{3}', {4}, {5}) """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind, bd_gum, bd_total) cursor.execute(insert_qry) finally: cursor.close() close_dbcon(conn) close_driver(driver)
close_driver(driver) def resolution_main(conn): start_time = get_crawl_time('R') end_time = start_time rcpnos = get_rcplist(start_time, 'R') #rcpnos = [['010050', '주주총회소집결의', '20200130800253', '20200213180755', 'Y']] for rcpno in rcpnos: info_logger.info('---------- rcp_no : [{0}] ----------'.format( rcpno[2])) cursor = conn.cursor() jm_code, rcp_no, rcp_yn, rcp_gb = get_rcpNo(rcpno) get_resolution(jm_code, rcp_no, rcp_gb, cursor) end_time = rcpno[3] cursor.close() conn.commit() time.sleep(1) set_crawl_time(end_time, 'R') if __name__ == "__main__": conn = get_dbcon('esg') resolution_main(conn) close_dbcon(conn)
def resolution_main(jm_code, rcp_no, rcp_yn, rcp_gb): # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) # 주총결의 데이터 세팅 driver.switch_to.frame(driver.find_element_by_tag_name("iframe")) # 상장 구분 if '유가' in rcp_gb: tb_mst = driver.find_elements_by_xpath( '//*[@id="XFormD52_Form0_Table0"]/tbody/tr') else: tb_mst = driver.find_elements_by_xpath( '//*[@id="XFormD2_Form0_Table0"]/tbody/tr') # 주총 결의 meet_tb = [0 for x in range(9)] if '유가' in rcp_gb: meet_tb[0] = tb_mst[1].find_elements_by_tag_name('td')[1].text # 일자 meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[2].text # 시간 meet_tb[8] = tb_mst[0].find_elements_by_tag_name('td')[1].text # 주총구분 else: meet_tb[0] = tb_mst[0].find_elements_by_tag_name('td')[2].text # 일자 meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[1].text # 시간 meet_tb[8] = tb_mst[8].find_elements_by_tag_name('td')[1].text # 주총구분 meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text # 장소 meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text # 의안내용 meet_tb[4] = tb_mst[4].find_elements_by_tag_name('td')[1].text # 이사회결의일 meet_tb[5] = tb_mst[5].find_elements_by_tag_name('td')[2].text # 사외이사_참 meet_tb[6] = tb_mst[6].find_elements_by_tag_name('td')[1].text # 사외이사_불참 meet_tb[7] = tb_mst[7].find_elements_by_tag_name('td')[1].text # 감사참석여부 # 이사선임 & 사업목적 isa_arr = [] biz_arr = [] if '유가' in rcp_gb: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L9019"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L9018"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L9016"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L9015"]') # 감사선임 div 유무 if isa_1 != '' and isa_1: isa_arr.extend(get_isa(driver, 'LIB_L9019')) if isa_2 != '' and isa_2: isa_arr.extend(get_isa(driver, 'LIB_L9018')) if isa_3 != '' and isa_3: isa_arr.extend(get_isa(driver, 'LIB_L9016')) if isa_4 != '' and isa_4: isa_arr.extend(get_isa(driver, 'LIB_L9015')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L9017"]') # 사업목적 div 유무 if tb_biz != '' and tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L9017')) elif '코스닥' in rcp_gb: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L7021"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L7020"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L7018"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L7017"]') # 감사선임 div 유무 if isa_1 != '' and isa_1: isa_arr.extend(get_isa(driver, 'LIB_L7021')) if isa_2 != '' and isa_2: isa_arr.extend(get_isa(driver, 'LIB_L7020')) if isa_3 != '' and isa_3: isa_arr.extend(get_isa(driver, 'LIB_L7018')) if isa_4 != '' and isa_4: isa_arr.extend(get_isa(driver, 'LIB_L7017')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L7019"]') # 사업목적 div 유무 if tb_biz != '' and tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L7019')) else: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L3025"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L3024"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L3022"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L3021"]') # 감사선임 div 유무 if isa_1 != '' and isa_1: isa_arr.extend(get_isa(driver, 'LIB_L3025')) if isa_2 != '' and isa_2: isa_arr.extend(get_isa(driver, 'LIB_L3024')) if isa_3 != '' and isa_3: isa_arr.extend(get_isa(driver, 'LIB_L3022')) if isa_4 != '' and isa_4: isa_arr.extend(get_isa(driver, 'LIB_L3021')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L3023"]') # 사업목적 div 유무 if tb_biz != '' and tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L3023')) # DB 삽입 try: conn = get_dbcon('esg') cursor = conn.cursor() try: # 조회용 주총 값 ymd = make_ymd(meet_tb[0]) gb = get_regYn(meet_tb[8]) seq_select = """select * from proxy001 where meet_ymd = '{0}' and jm_code = '{1}' and meet_gb = '{2}' """.format(ymd, jm_code, gb) cursor.execute(seq_select) rows = cursor.rowcount # 기재정정이 아닐 경우 중복체크 if rcp_yn == '' and rows > 0: print('중복 데이터가 있습니다.') sys.exit(0) # report_ver 키값 생성(개정일 + seq) report_ver = rcp_no[:8] + str(rows + 1).zfill(2) # 결의 mst 삽입 in_qry = resolution_mst_ins(meet_tb, jm_code, report_ver, rcp_no) cursor.execute(in_qry) print(in_qry) # 이사선임 삽입 if isa_arr: ins_isa, dup_isa = isa_mst_ins(isa_arr, meet_tb[0], jm_code, gb, report_ver) # 이사선임 for i in range(0, len(ins_isa)): # 이사 중복 체크 cursor.execute(dup_isa[i]) dup_cnt = cursor.rowcount if dup_cnt > 0: print('중복된 이사가 있습니다.') continue cursor.execute(ins_isa[i]) print(str(i) + " : " + ins_isa[i]) if chk_no_data(isa_arr[i][4]): ins_isa_car = isa_car_ins(isa_arr[i], meet_tb[0], jm_code, gb, report_ver, i) # 이사선임_경력 cursor.execute(ins_isa_car) print(str(i) + " : " + ins_isa_car) if chk_no_data(isa_arr[i][5]): ins_isa_dup = isa_dup_ins(isa_arr[i], meet_tb[0], jm_code, gb, report_ver, i) # 이사선임_겸직 cursor.execute(ins_isa_dup) print(str(i) + " : " + ins_isa_dup) # 사업목적 변경 삽입 if biz_arr: for i in range(0, len(biz_arr)): ins_biz = biz_ins(biz_arr, meet_tb[0], jm_code, gb, report_ver) cursor.execute(ins_biz[i]) print(str(i) + " : " + str(ins_biz[i])) except: f = open("C:\\Users\\rmffo\\PycharmProjects\\log\\error_log.txt", 'a') f.write(jm_code + '\n') f.close() cursor.close() finally: close_dbcon(conn) # driver close close_driver(driver)