def get_biz(driver, tbnm): tb_biz = driver.find_elements_by_xpath( '//*[@id="{0}"]/div/div/table/tbody/tr'.format(tbnm)) # 테이블은 있으나 데이터 없는 경우 if tb_biz[1].find_elements_by_tag_name('td')[0].text == '' or tb_biz[ 1].find_elements_by_tag_name('td')[0].text == '-': return [] # 테이블 row, col 세팅 row = len(tb_biz) col = 4 # 결과 담을 배열 res_arr = [[0 for y in range(col)] for x in range(row - 1)] for i in range(1, row): if tbnm == 'LIB_L9017': if i == 1: # 추가 res_arr[i - 1][0] = '1' res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name( 'td')[1].text res_arr[i - 1][2] = '' res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name( 'td')[2].text elif i == 2: # 삭제 res_arr[i - 1][0] = '3' res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name( 'td')[1].text res_arr[i - 1][2] = '' res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name( 'td')[2].text elif i == 4: # 변경 res_arr[i - 1][0] = '2' res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name( 'td')[0].text res_arr[i - 1][2] = tb_biz[i].find_elements_by_tag_name( 'td')[1].text res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name( 'td')[2].text else: continue else: if i == 1: # 추가 res_arr[i - 1][0] = '1' res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name( 'td')[1].text res_arr[i - 1][2] = '' res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name( 'td')[2].text elif i == 3: # 변경 res_arr[i - 1][0] = '2' res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name( 'td')[0].text res_arr[i - 1][2] = tb_biz[i].find_elements_by_tag_name( 'td')[1].text res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name( 'td')[2].text elif i == 4: # 삭제 res_arr[i - 1][0] = '3' res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name( 'td')[1].text res_arr[i - 1][2] = '' res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name( 'td')[2].text else: continue # 타이틀 제거 if tbnm == 'LIB_L9017': res_arr.pop(2) else: res_arr.pop(1) # 빈 데이터 제거 ################################# 변경전이 - 인 예외 경우 체크 minus = 0 res_len = len(res_arr) for i in range(0, res_len): cnt = i - minus if not chk_no_data(res_arr[cnt][1]): res_arr.pop(cnt) minus = minus + 1 return res_arr
def get_resolution(jm_code, rcp_no, rcp_gb, cursor): try: # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) driver.implicitly_wait(10) # 주총 공고의 rcpno 히스토리 rcpno_list = get_rcpno_list(driver) # 최초 문서의 공고년도 first_rcp_no = rcpno_list[0] first_rcp_yy = first_rcp_no[:4] # 주총결의 데이터 세팅 driver.switch_to.frame(driver.find_element_by_tag_name("iframe")) # 상장 구분 if 'Y' in rcp_gb: tb_mst = driver.find_elements_by_xpath( '//*[@id="XFormD52_Form0_Table0"]/tbody/tr') else: tb_mst = driver.find_elements_by_xpath( '//*[@id="XFormD2_Form0_Table0"]/tbody/tr') # 주총 결의 meet_tb = [0 for x in range(9)] if 'Y' in rcp_gb: meet_tb[0] = tb_mst[1].find_elements_by_tag_name( 'td')[1].text # 일자 meet_tb[1] = tb_mst[1].find_elements_by_tag_name( 'td')[2].text # 시간 meet_tb[8] = tb_mst[0].find_elements_by_tag_name( 'td')[1].text # 주총구분 else: meet_tb[0] = tb_mst[0].find_elements_by_tag_name( 'td')[2].text # 일자 meet_tb[1] = tb_mst[1].find_elements_by_tag_name( 'td')[1].text # 시간 meet_tb[8] = tb_mst[8].find_elements_by_tag_name( 'td')[1].text # 주총구분 meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text # 장소 meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text # 의안내용 meet_tb[4] = tb_mst[4].find_elements_by_tag_name( 'td')[1].text # 이사회결의일 meet_tb[5] = tb_mst[5].find_elements_by_tag_name( 'td')[2].text # 사외이사_참 meet_tb[6] = tb_mst[6].find_elements_by_tag_name( 'td')[1].text # 사외이사_불참 meet_tb[7] = tb_mst[7].find_elements_by_tag_name( 'td')[1].text # 감사참석여부 # 사외이사 선임 및 사업목적 테이블 유무 확인 """isa_1, isa_2, isa_3, isa_4, tb_biz = False, False, False, False, False spans = driver.find_elements_by_tag_name('span') for span in spans: title = span.text title = get_hangul(title) if title == '이사선임세부내역': isa_1 = True elif title == '사외이사선임세부내역': isa_2 = True elif title == '감사위원선임세부내역': isa_3 = True elif title == '감사선임세부내역': isa_4 = True elif title == '사업목적변경세부내역': tb_biz = True """ # 이사선임 & 사업목적 isa_arr = [] biz_arr = [] if 'Y' in rcp_gb: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L9019"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L9018"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L9016"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L9015"]') # 감사선임 div 유무 if isa_1: isa_arr.extend(get_isa(driver, 'LIB_L9019')) if isa_2: isa_arr.extend(get_isa(driver, 'LIB_L9018')) if isa_3: isa_arr.extend(get_isa(driver, 'LIB_L9016')) if isa_4: isa_arr.extend(get_isa(driver, 'LIB_L9015')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L9017"]') # 사업목적 div 유무 if tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L9017')) elif 'K' in rcp_gb: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L7021"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L7020"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L7018"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L7017"]') # 감사선임 div 유무 if isa_1: isa_arr.extend(get_isa(driver, 'LIB_L7021')) if isa_2: isa_arr.extend(get_isa(driver, 'LIB_L7020')) if isa_3: isa_arr.extend(get_isa(driver, 'LIB_L7018')) if isa_4: isa_arr.extend(get_isa(driver, 'LIB_L7017')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L7019"]') # 사업목적 div 유무 if tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L7019')) else: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L3025"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L3024"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L3022"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L3021"]') # 감사선임 div 유무 if isa_1: isa_arr.extend(get_isa(driver, 'LIB_L3025')) if isa_2: isa_arr.extend(get_isa(driver, 'LIB_L3024')) if isa_3: isa_arr.extend(get_isa(driver, 'LIB_L3022')) if isa_4: isa_arr.extend(get_isa(driver, 'LIB_L3021')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L3023"]') # 사업목적 div 유무 if tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L3023')) # --------------------------------------------------------------------------------- # # DB 삽입 # 중복체크 dup_select = """select * from proxy001 where rcp_no = '{0}'""".format( rcp_no) cursor.execute(dup_select) dup_cnt = cursor.rowcount if dup_cnt > 0: return 0 # 회차 max 값 max_select = """select * from proxy001 where left(first_rcpno, 4) = '{0}' and jm_code = '{1}' group by meet_seq """.format(first_rcp_yy, jm_code) cursor.execute(max_select) max_seq = cursor.rowcount # meet_seq 생성 seq_select = """select meet_seq from proxy001 where first_rcpno = '{0}' """.format(first_rcp_no) cursor.execute(seq_select) seq = cursor.fetchone() if cursor.rowcount < 1: seq = str(max_seq + 1).zfill(2) else: seq = "".join(seq) seq = seq[-2:] yyyy = make_ymd(meet_tb[0][:4]) if yyyy is not None and yyyy != '': yyyy = yyyy[:4] else: yyyy = time.strftime('%Y') meet_seq = jm_code + yyyy + seq # 결의 mst 삽입 in_qry = resolution_mst_ins(meet_seq, meet_tb, jm_code, rcp_no, rcpno_list[0]) cursor.execute(in_qry) #print(in_qry) # 이사선임 삽입 if isa_arr: #print(isa_arr) for i in range(0, len(isa_arr)): ins_isa_info = isa_info_ins(meet_seq, isa_arr[i], rcp_no, i) cursor.execute(ins_isa_info) #print(str(i), '번째 이사 쿼리 : ', ins_isa_info) if chk_no_data(isa_arr[i][4]): ins_isa_car = isa_car_ins(meet_seq, isa_arr[i], rcp_no, i) # 이사선임_경력 cursor.execute(ins_isa_car) #print(str(i), '번째 이사 경력 쿼리 : ', ins_isa_car) if chk_no_data(isa_arr[i][5]): ins_isa_dup = isa_dup_ins(meet_seq, isa_arr[i], rcp_no, i) # 이사선임_겸직 cursor.execute(ins_isa_dup) #print(str(i), '번째 이사 겸직 쿼리 : ', ins_isa_dup) # 사업목적 변경 삽입 if biz_arr: for i in range(0, len(biz_arr)): #print(biz_arr[i]) ins_biz = biz_ins(meet_seq, biz_arr[i], rcp_no) cursor.execute(ins_biz) #print("사업목적 변경 쿼리 : ", ins_biz) except Exception as e: error_logger.error( 'Resolution crawling fail. : [{0}] [{1}] {2}'.format( jm_code, rcp_no, e)) finally: close_driver(driver)
def resolution_main(jm_code, rcp_no, rcp_yn, rcp_gb): # driver 세팅(결의, 공고) driver = get_driver( 'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe', 'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no)) # 주총결의 데이터 세팅 driver.switch_to.frame(driver.find_element_by_tag_name("iframe")) # 상장 구분 if '유가' in rcp_gb: tb_mst = driver.find_elements_by_xpath( '//*[@id="XFormD52_Form0_Table0"]/tbody/tr') else: tb_mst = driver.find_elements_by_xpath( '//*[@id="XFormD2_Form0_Table0"]/tbody/tr') # 주총 결의 meet_tb = [0 for x in range(9)] if '유가' in rcp_gb: meet_tb[0] = tb_mst[1].find_elements_by_tag_name('td')[1].text # 일자 meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[2].text # 시간 meet_tb[8] = tb_mst[0].find_elements_by_tag_name('td')[1].text # 주총구분 else: meet_tb[0] = tb_mst[0].find_elements_by_tag_name('td')[2].text # 일자 meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[1].text # 시간 meet_tb[8] = tb_mst[8].find_elements_by_tag_name('td')[1].text # 주총구분 meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text # 장소 meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text # 의안내용 meet_tb[4] = tb_mst[4].find_elements_by_tag_name('td')[1].text # 이사회결의일 meet_tb[5] = tb_mst[5].find_elements_by_tag_name('td')[2].text # 사외이사_참 meet_tb[6] = tb_mst[6].find_elements_by_tag_name('td')[1].text # 사외이사_불참 meet_tb[7] = tb_mst[7].find_elements_by_tag_name('td')[1].text # 감사참석여부 # 이사선임 & 사업목적 isa_arr = [] biz_arr = [] if '유가' in rcp_gb: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L9019"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L9018"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L9016"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L9015"]') # 감사선임 div 유무 if isa_1 != '' and isa_1: isa_arr.extend(get_isa(driver, 'LIB_L9019')) if isa_2 != '' and isa_2: isa_arr.extend(get_isa(driver, 'LIB_L9018')) if isa_3 != '' and isa_3: isa_arr.extend(get_isa(driver, 'LIB_L9016')) if isa_4 != '' and isa_4: isa_arr.extend(get_isa(driver, 'LIB_L9015')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L9017"]') # 사업목적 div 유무 if tb_biz != '' and tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L9017')) elif '코스닥' in rcp_gb: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L7021"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L7020"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L7018"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L7017"]') # 감사선임 div 유무 if isa_1 != '' and isa_1: isa_arr.extend(get_isa(driver, 'LIB_L7021')) if isa_2 != '' and isa_2: isa_arr.extend(get_isa(driver, 'LIB_L7020')) if isa_3 != '' and isa_3: isa_arr.extend(get_isa(driver, 'LIB_L7018')) if isa_4 != '' and isa_4: isa_arr.extend(get_isa(driver, 'LIB_L7017')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L7019"]') # 사업목적 div 유무 if tb_biz != '' and tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L7019')) else: # 이사선임 isa_1 = driver.find_elements_by_xpath( '//*[@id="LIB_L3025"]') # 이사선임 div 유무 isa_2 = driver.find_elements_by_xpath( '//*[@id="LIB_L3024"]') # 사외이사선임 div 유무 isa_3 = driver.find_elements_by_xpath( '//*[@id="LIB_L3022"]') # 감사위원선임 div 유무 isa_4 = driver.find_elements_by_xpath( '//*[@id="LIB_L3021"]') # 감사선임 div 유무 if isa_1 != '' and isa_1: isa_arr.extend(get_isa(driver, 'LIB_L3025')) if isa_2 != '' and isa_2: isa_arr.extend(get_isa(driver, 'LIB_L3024')) if isa_3 != '' and isa_3: isa_arr.extend(get_isa(driver, 'LIB_L3022')) if isa_4 != '' and isa_4: isa_arr.extend(get_isa(driver, 'LIB_L3021')) # 사업목적 tb_biz = driver.find_elements_by_xpath( '//*[@id="LIB_L3023"]') # 사업목적 div 유무 if tb_biz != '' and tb_biz: biz_arr.extend(get_biz(driver, 'LIB_L3023')) # DB 삽입 try: conn = get_dbcon('esg') cursor = conn.cursor() try: # 조회용 주총 값 ymd = make_ymd(meet_tb[0]) gb = get_regYn(meet_tb[8]) seq_select = """select * from proxy001 where meet_ymd = '{0}' and jm_code = '{1}' and meet_gb = '{2}' """.format(ymd, jm_code, gb) cursor.execute(seq_select) rows = cursor.rowcount # 기재정정이 아닐 경우 중복체크 if rcp_yn == '' and rows > 0: print('중복 데이터가 있습니다.') sys.exit(0) # report_ver 키값 생성(개정일 + seq) report_ver = rcp_no[:8] + str(rows + 1).zfill(2) # 결의 mst 삽입 in_qry = resolution_mst_ins(meet_tb, jm_code, report_ver, rcp_no) cursor.execute(in_qry) print(in_qry) # 이사선임 삽입 if isa_arr: ins_isa, dup_isa = isa_mst_ins(isa_arr, meet_tb[0], jm_code, gb, report_ver) # 이사선임 for i in range(0, len(ins_isa)): # 이사 중복 체크 cursor.execute(dup_isa[i]) dup_cnt = cursor.rowcount if dup_cnt > 0: print('중복된 이사가 있습니다.') continue cursor.execute(ins_isa[i]) print(str(i) + " : " + ins_isa[i]) if chk_no_data(isa_arr[i][4]): ins_isa_car = isa_car_ins(isa_arr[i], meet_tb[0], jm_code, gb, report_ver, i) # 이사선임_경력 cursor.execute(ins_isa_car) print(str(i) + " : " + ins_isa_car) if chk_no_data(isa_arr[i][5]): ins_isa_dup = isa_dup_ins(isa_arr[i], meet_tb[0], jm_code, gb, report_ver, i) # 이사선임_겸직 cursor.execute(ins_isa_dup) print(str(i) + " : " + ins_isa_dup) # 사업목적 변경 삽입 if biz_arr: for i in range(0, len(biz_arr)): ins_biz = biz_ins(biz_arr, meet_tb[0], jm_code, gb, report_ver) cursor.execute(ins_biz[i]) print(str(i) + " : " + str(ins_biz[i])) except: f = open("C:\\Users\\rmffo\\PycharmProjects\\log\\error_log.txt", 'a') f.write(jm_code + '\n') f.close() cursor.close() finally: close_dbcon(conn) # driver close close_driver(driver)