Пример #1
0
def get_biz(driver, tbnm):
    tb_biz = driver.find_elements_by_xpath(
        '//*[@id="{0}"]/div/div/table/tbody/tr'.format(tbnm))
    # 테이블은 있으나 데이터 없는 경우
    if tb_biz[1].find_elements_by_tag_name('td')[0].text == '' or tb_biz[
            1].find_elements_by_tag_name('td')[0].text == '-':
        return []

    # 테이블 row, col 세팅
    row = len(tb_biz)
    col = 4

    # 결과 담을 배열
    res_arr = [[0 for y in range(col)] for x in range(row - 1)]
    for i in range(1, row):
        if tbnm == 'LIB_L9017':
            if i == 1:  # 추가
                res_arr[i - 1][0] = '1'
                res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name(
                    'td')[1].text
                res_arr[i - 1][2] = ''
                res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name(
                    'td')[2].text
            elif i == 2:  # 삭제
                res_arr[i - 1][0] = '3'
                res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name(
                    'td')[1].text
                res_arr[i - 1][2] = ''
                res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name(
                    'td')[2].text
            elif i == 4:  # 변경
                res_arr[i - 1][0] = '2'
                res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name(
                    'td')[0].text
                res_arr[i - 1][2] = tb_biz[i].find_elements_by_tag_name(
                    'td')[1].text
                res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name(
                    'td')[2].text
            else:
                continue
        else:
            if i == 1:  # 추가
                res_arr[i - 1][0] = '1'
                res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name(
                    'td')[1].text
                res_arr[i - 1][2] = ''
                res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name(
                    'td')[2].text
            elif i == 3:  # 변경
                res_arr[i - 1][0] = '2'
                res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name(
                    'td')[0].text
                res_arr[i - 1][2] = tb_biz[i].find_elements_by_tag_name(
                    'td')[1].text
                res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name(
                    'td')[2].text
            elif i == 4:  # 삭제
                res_arr[i - 1][0] = '3'
                res_arr[i - 1][1] = tb_biz[i].find_elements_by_tag_name(
                    'td')[1].text
                res_arr[i - 1][2] = ''
                res_arr[i - 1][3] = tb_biz[i].find_elements_by_tag_name(
                    'td')[2].text
            else:
                continue

    # 타이틀 제거
    if tbnm == 'LIB_L9017':
        res_arr.pop(2)
    else:
        res_arr.pop(1)

    # 빈 데이터 제거
    ################################# 변경전이 - 인 예외 경우 체크
    minus = 0
    res_len = len(res_arr)
    for i in range(0, res_len):
        cnt = i - minus
        if not chk_no_data(res_arr[cnt][1]):
            res_arr.pop(cnt)
            minus = minus + 1

    return res_arr
Пример #2
0
def get_resolution(jm_code, rcp_no, rcp_gb, cursor):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        driver.implicitly_wait(10)

        # 주총 공고의 rcpno 히스토리
        rcpno_list = get_rcpno_list(driver)
        # 최초 문서의 공고년도
        first_rcp_no = rcpno_list[0]
        first_rcp_yy = first_rcp_no[:4]

        # 주총결의 데이터 세팅
        driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
        # 상장 구분
        if 'Y' in rcp_gb:
            tb_mst = driver.find_elements_by_xpath(
                '//*[@id="XFormD52_Form0_Table0"]/tbody/tr')
        else:
            tb_mst = driver.find_elements_by_xpath(
                '//*[@id="XFormD2_Form0_Table0"]/tbody/tr')

        # 주총 결의
        meet_tb = [0 for x in range(9)]
        if 'Y' in rcp_gb:
            meet_tb[0] = tb_mst[1].find_elements_by_tag_name(
                'td')[1].text  # 일자
            meet_tb[1] = tb_mst[1].find_elements_by_tag_name(
                'td')[2].text  # 시간
            meet_tb[8] = tb_mst[0].find_elements_by_tag_name(
                'td')[1].text  # 주총구분
        else:
            meet_tb[0] = tb_mst[0].find_elements_by_tag_name(
                'td')[2].text  # 일자
            meet_tb[1] = tb_mst[1].find_elements_by_tag_name(
                'td')[1].text  # 시간
            meet_tb[8] = tb_mst[8].find_elements_by_tag_name(
                'td')[1].text  # 주총구분

        meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text  # 장소
        meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text  # 의안내용
        meet_tb[4] = tb_mst[4].find_elements_by_tag_name(
            'td')[1].text  # 이사회결의일
        meet_tb[5] = tb_mst[5].find_elements_by_tag_name(
            'td')[2].text  # 사외이사_참
        meet_tb[6] = tb_mst[6].find_elements_by_tag_name(
            'td')[1].text  # 사외이사_불참
        meet_tb[7] = tb_mst[7].find_elements_by_tag_name(
            'td')[1].text  # 감사참석여부

        # 사외이사 선임 및 사업목적 테이블 유무 확인
        """isa_1, isa_2, isa_3, isa_4, tb_biz = False, False, False, False, False
        spans = driver.find_elements_by_tag_name('span')
        for span in spans:
            title = span.text
            title = get_hangul(title)
            if title == '이사선임세부내역':
                isa_1 = True
            elif title == '사외이사선임세부내역':
                isa_2 = True
            elif title == '감사위원선임세부내역':
                isa_3 = True
            elif title == '감사선임세부내역':
                isa_4 = True
            elif title == '사업목적변경세부내역':
                tb_biz = True
        """

        # 이사선임 & 사업목적
        isa_arr = []
        biz_arr = []
        if 'Y' in rcp_gb:
            # 이사선임
            isa_1 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9019"]')  # 이사선임 div 유무
            isa_2 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9018"]')  # 사외이사선임 div 유무
            isa_3 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9016"]')  # 감사위원선임 div 유무
            isa_4 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9015"]')  # 감사선임 div 유무

            if isa_1:
                isa_arr.extend(get_isa(driver, 'LIB_L9019'))
            if isa_2:
                isa_arr.extend(get_isa(driver, 'LIB_L9018'))
            if isa_3:
                isa_arr.extend(get_isa(driver, 'LIB_L9016'))
            if isa_4:
                isa_arr.extend(get_isa(driver, 'LIB_L9015'))

            # 사업목적
            tb_biz = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9017"]')  # 사업목적 div 유무

            if tb_biz:
                biz_arr.extend(get_biz(driver, 'LIB_L9017'))
        elif 'K' in rcp_gb:
            # 이사선임
            isa_1 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7021"]')  # 이사선임 div 유무
            isa_2 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7020"]')  # 사외이사선임 div 유무
            isa_3 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7018"]')  # 감사위원선임 div 유무
            isa_4 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7017"]')  # 감사선임 div 유무

            if isa_1:
                isa_arr.extend(get_isa(driver, 'LIB_L7021'))
            if isa_2:
                isa_arr.extend(get_isa(driver, 'LIB_L7020'))
            if isa_3:
                isa_arr.extend(get_isa(driver, 'LIB_L7018'))
            if isa_4:
                isa_arr.extend(get_isa(driver, 'LIB_L7017'))

            # 사업목적
            tb_biz = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7019"]')  # 사업목적 div 유무

            if tb_biz:
                biz_arr.extend(get_biz(driver, 'LIB_L7019'))
        else:
            # 이사선임
            isa_1 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3025"]')  # 이사선임 div 유무
            isa_2 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3024"]')  # 사외이사선임 div 유무
            isa_3 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3022"]')  # 감사위원선임 div 유무
            isa_4 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3021"]')  # 감사선임 div 유무

            if isa_1:
                isa_arr.extend(get_isa(driver, 'LIB_L3025'))
            if isa_2:
                isa_arr.extend(get_isa(driver, 'LIB_L3024'))
            if isa_3:
                isa_arr.extend(get_isa(driver, 'LIB_L3022'))
            if isa_4:
                isa_arr.extend(get_isa(driver, 'LIB_L3021'))

            # 사업목적
            tb_biz = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3023"]')  # 사업목적 div 유무

            if tb_biz:
                biz_arr.extend(get_biz(driver, 'LIB_L3023'))

        # --------------------------------------------------------------------------------- #
        # DB 삽입
        # 중복체크
        dup_select = """select * from proxy001 where rcp_no = '{0}'""".format(
            rcp_no)
        cursor.execute(dup_select)
        dup_cnt = cursor.rowcount
        if dup_cnt > 0:
            return 0

        # 회차 max 값
        max_select = """select * from proxy001 where left(first_rcpno, 4) = '{0}' and jm_code = '{1}' group by meet_seq
                             """.format(first_rcp_yy, jm_code)

        cursor.execute(max_select)
        max_seq = cursor.rowcount

        # meet_seq 생성
        seq_select = """select meet_seq from proxy001 where first_rcpno = '{0}'
                             """.format(first_rcp_no)

        cursor.execute(seq_select)
        seq = cursor.fetchone()

        if cursor.rowcount < 1:
            seq = str(max_seq + 1).zfill(2)
        else:
            seq = "".join(seq)
            seq = seq[-2:]

        yyyy = make_ymd(meet_tb[0][:4])
        if yyyy is not None and yyyy != '':
            yyyy = yyyy[:4]
        else:
            yyyy = time.strftime('%Y')

        meet_seq = jm_code + yyyy + seq

        # 결의 mst 삽입
        in_qry = resolution_mst_ins(meet_seq, meet_tb, jm_code, rcp_no,
                                    rcpno_list[0])
        cursor.execute(in_qry)
        #print(in_qry)

        # 이사선임 삽입
        if isa_arr:
            #print(isa_arr)
            for i in range(0, len(isa_arr)):
                ins_isa_info = isa_info_ins(meet_seq, isa_arr[i], rcp_no, i)
                cursor.execute(ins_isa_info)
                #print(str(i), '번째 이사 쿼리 : ', ins_isa_info)

                if chk_no_data(isa_arr[i][4]):
                    ins_isa_car = isa_car_ins(meet_seq, isa_arr[i], rcp_no,
                                              i)  # 이사선임_경력
                    cursor.execute(ins_isa_car)
                    #print(str(i), '번째 이사 경력 쿼리 : ', ins_isa_car)

                if chk_no_data(isa_arr[i][5]):
                    ins_isa_dup = isa_dup_ins(meet_seq, isa_arr[i], rcp_no,
                                              i)  # 이사선임_겸직
                    cursor.execute(ins_isa_dup)
                    #print(str(i), '번째 이사 겸직 쿼리 : ', ins_isa_dup)

        # 사업목적 변경 삽입
        if biz_arr:
            for i in range(0, len(biz_arr)):
                #print(biz_arr[i])
                ins_biz = biz_ins(meet_seq, biz_arr[i], rcp_no)
                cursor.execute(ins_biz)
                #print("사업목적 변경 쿼리 : ", ins_biz)
    except Exception as e:
        error_logger.error(
            'Resolution crawling fail. : [{0}] [{1}] {2}'.format(
                jm_code, rcp_no, e))
    finally:
        close_driver(driver)
Пример #3
0
def resolution_main(jm_code, rcp_no, rcp_yn, rcp_gb):
    # driver 세팅(결의, 공고)
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

    # 주총결의 데이터 세팅
    driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
    # 상장 구분
    if '유가' in rcp_gb:
        tb_mst = driver.find_elements_by_xpath(
            '//*[@id="XFormD52_Form0_Table0"]/tbody/tr')
    else:
        tb_mst = driver.find_elements_by_xpath(
            '//*[@id="XFormD2_Form0_Table0"]/tbody/tr')

    # 주총 결의
    meet_tb = [0 for x in range(9)]
    if '유가' in rcp_gb:
        meet_tb[0] = tb_mst[1].find_elements_by_tag_name('td')[1].text  # 일자
        meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[2].text  # 시간
        meet_tb[8] = tb_mst[0].find_elements_by_tag_name('td')[1].text  # 주총구분
    else:
        meet_tb[0] = tb_mst[0].find_elements_by_tag_name('td')[2].text  # 일자
        meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[1].text  # 시간
        meet_tb[8] = tb_mst[8].find_elements_by_tag_name('td')[1].text  # 주총구분

    meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text  # 장소
    meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text  # 의안내용
    meet_tb[4] = tb_mst[4].find_elements_by_tag_name('td')[1].text  # 이사회결의일
    meet_tb[5] = tb_mst[5].find_elements_by_tag_name('td')[2].text  # 사외이사_참
    meet_tb[6] = tb_mst[6].find_elements_by_tag_name('td')[1].text  # 사외이사_불참
    meet_tb[7] = tb_mst[7].find_elements_by_tag_name('td')[1].text  # 감사참석여부

    # 이사선임 & 사업목적
    isa_arr = []
    biz_arr = []
    if '유가' in rcp_gb:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9019"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9018"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9016"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9015"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L9019'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L9018'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L9016'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L9015'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9017"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L9017'))
    elif '코스닥' in rcp_gb:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7021"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7020"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7018"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7017"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L7021'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L7020'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L7018'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L7017'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7019"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L7019'))
    else:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3025"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3024"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3022"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3021"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L3025'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L3024'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L3022'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L3021'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3023"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L3023'))

    # DB 삽입
    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()

        try:
            # 조회용 주총 값
            ymd = make_ymd(meet_tb[0])
            gb = get_regYn(meet_tb[8])
            seq_select = """select * from proxy001 where meet_ymd = '{0}' and jm_code = '{1}' and meet_gb = '{2}'
                         """.format(ymd, jm_code, gb)

            cursor.execute(seq_select)
            rows = cursor.rowcount

            # 기재정정이 아닐 경우 중복체크
            if rcp_yn == '' and rows > 0:
                print('중복 데이터가 있습니다.')
                sys.exit(0)

            # report_ver 키값 생성(개정일 + seq)
            report_ver = rcp_no[:8] + str(rows + 1).zfill(2)

            # 결의 mst 삽입
            in_qry = resolution_mst_ins(meet_tb, jm_code, report_ver, rcp_no)
            cursor.execute(in_qry)
            print(in_qry)

            # 이사선임 삽입
            if isa_arr:
                ins_isa, dup_isa = isa_mst_ins(isa_arr, meet_tb[0], jm_code,
                                               gb, report_ver)  # 이사선임
                for i in range(0, len(ins_isa)):
                    # 이사 중복 체크
                    cursor.execute(dup_isa[i])
                    dup_cnt = cursor.rowcount
                    if dup_cnt > 0:
                        print('중복된 이사가 있습니다.')
                        continue

                    cursor.execute(ins_isa[i])
                    print(str(i) + " : " + ins_isa[i])

                    if chk_no_data(isa_arr[i][4]):
                        ins_isa_car = isa_car_ins(isa_arr[i], meet_tb[0],
                                                  jm_code, gb, report_ver,
                                                  i)  # 이사선임_경력
                        cursor.execute(ins_isa_car)
                        print(str(i) + " : " + ins_isa_car)

                    if chk_no_data(isa_arr[i][5]):
                        ins_isa_dup = isa_dup_ins(isa_arr[i], meet_tb[0],
                                                  jm_code, gb, report_ver,
                                                  i)  # 이사선임_겸직
                        cursor.execute(ins_isa_dup)
                        print(str(i) + " : " + ins_isa_dup)

            # 사업목적 변경 삽입
            if biz_arr:
                for i in range(0, len(biz_arr)):
                    ins_biz = biz_ins(biz_arr, meet_tb[0], jm_code, gb,
                                      report_ver)
                    cursor.execute(ins_biz[i])
                    print(str(i) + " : " + str(ins_biz[i]))
        except:
            f = open("C:\\Users\\rmffo\\PycharmProjects\\log\\error_log.txt",
                     'a')
            f.write(jm_code + '\n')
            f.close()

        cursor.close()
    finally:
        close_dbcon(conn)

    # driver close
    close_driver(driver)