示例#1
0
def report_main(jm_code, rcp_no):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        # 주총 결의의 rcpno 히스토리
        rcpno_list = get_rcpno_list(driver)
        # 최초 문서의 공고년도
        first_rcp_yy = rcpno_list[0][:4]

        conn = get_dbcon('esg')
        cursor = conn.cursor()

        # 보상위원회 유무 확인
        get_tab(driver, 'b')
        bosang_yn = get_board_yn(driver, 'b')
        print(bosang_yn)
        driver.switch_to_default_content()
        # 감사위원회 유무 확인
        get_tab(driver, 'g')
        gamsa_yn = get_board_yn(driver, 'g')
        print(gamsa_yn)

        # --------------------------------------------------------------------------------- #
        # DB 삽입
        # 중복체크
        insert_qry = """insert into proxy700_tmp values('{0}', '{1}', '{2}', '{3}')""".format(
            jm_code, '2018', bosang_yn, gamsa_yn)
        cursor.execute(insert_qry)
    finally:
        cursor.close()
        close_dbcon(conn)
        close_driver(driver)
示例#2
0
def nps001_select(rcpno):
    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()


    finally:
        cursor.close()
        close_dbcon(conn)
示例#3
0
def get_rcplist(time, gubun):
    if gubun == 'R':
        document_gb = resolution
        info_logeer_rs.info(start_msg_rs)
    else:
        document_gb = notice
        info_logeer_nt.info(start_msg_nt)

    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()
        # 수집 기간은 [last_time] ~ [수집 시작 시점]
        rcp_select = """SELECT crp_cd, rpt_nm, rcp_no, date_format(regdate, '%Y%m%d%H%i%s'), crp_cls FROM gsmda000 WHERE crp_cd IN ({0})
                    AND regdate > '{1}'
                    AND rpt_nm LIKE '%{2}%'
                    AND rpt_nm NOT LIKE '%첨부정정%'
                    ORDER BY regdate
              """.format(jm_code, time, document_gb)
        rcp_test = "select crp_cd, rpt_nm, rcp_no, date_format(regdate, '%Y%m%d%H%i%s'), crp_cls from gsmda000 where rcp_no = '20190320800644'"

        cursor.execute(rcp_select)
        rcpnos = cursor.fetchall()

        result = []
        jm_list = []
        for rcpno in rcpnos:
            # 중복체크
            dup_select = "select * from proxy011 where rcp_no = '{0}'".format(
                rcpno[2])
            cursor.execute(dup_select)
            if cursor.rowcount > 0:
                dup_msg = 'Duplicated rcp_no. (jm_code : [{0}])'.format(
                    rcpno[0])
                if gubun == 'R':
                    info_logeer_rs.info(dup_msg)
                else:
                    info_logeer_nt.info(dup_msg)
                continue

            # 수집 목록 리스트
            result.append(rcpno)
            jm_list.append(rcpno[0])
        if gubun == 'R':
            info_logeer_rs.info('jm_code list : {0}'.format(jm_list))
        else:
            info_logeer_nt.info('jm_code list : {0}'.format(jm_list))

        return result
    except Exception as e:
        if gubun == 'R':
            error_logeer_rs.error(e)
        else:
            error_logeer_nt.error(e)
    finally:
        cursor.close()
        close_dbcon(conn)
示例#4
0
def set_crawl_time(time, gubun):
    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()

        upd_ctime = """update proxy000
                       set c_time = '{0}'
                       where notice_gb = '{1}'
                    """.format(time, gubun)

        cursor.execute(upd_ctime)
    finally:
        cursor.close()
        close_dbcon(conn)
示例#5
0
def get_crawl_time(gubun):
    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()

        ld_select = "select c_time from proxy000 where notice_gb = '{0}'".format(
            gubun)

        cursor.execute(ld_select)
        last_date = cursor.fetchone()

        return last_date[0]
    finally:
        cursor.close()
        close_dbcon(conn)
示例#6
0
def bd_main(jm_code, rcp_no):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        driver.implicitly_wait(10)

        bd_gubun, bd_kind, bd_gum, bd_total, bd_gijun_ymd = get_bd_table(
            driver)

        conn = get_dbcon('esg')
        cursor = conn.cursor()

        # 중복 체크 및 DB 삽입
        dup_select = """select * from proxy080 where jm_code = '{0}' and bd_gijun_ymd = '{1}'
                     """.format(jm_code, bd_gijun_ymd)

        cursor.execute(dup_select)

        if cursor.rowcount > 0:
            insert_qry = """update proxy080
                            set bd_gubun = '{2}', bd_kind = '{3}', bd_gum = {4}, bd_total = {5}
                            where jm_code = '{0}' and bd_gijun_ymd = '{1}'
                         """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind,
                                    bd_gum, bd_total)
        else:
            insert_qry = """insert into proxy080 values('{0}', '{1}', '{2}', '{3}', {4}, {5})
                         """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind,
                                    bd_gum, bd_total)

        cursor.execute(insert_qry)
    finally:
        cursor.close()
        close_dbcon(conn)
        close_driver(driver)
示例#7
0
        close_driver(driver)


def resolution_main(conn):
    start_time = get_crawl_time('R')
    end_time = start_time

    rcpnos = get_rcplist(start_time, 'R')
    #rcpnos = [['010050', '주주총회소집결의', '20200130800253', '20200213180755', 'Y']]

    for rcpno in rcpnos:
        info_logger.info('---------- rcp_no : [{0}] ----------'.format(
            rcpno[2]))
        cursor = conn.cursor()

        jm_code, rcp_no, rcp_yn, rcp_gb = get_rcpNo(rcpno)
        get_resolution(jm_code, rcp_no, rcp_gb, cursor)
        end_time = rcpno[3]

        cursor.close()
        conn.commit()
        time.sleep(1)

    set_crawl_time(end_time, 'R')


if __name__ == "__main__":
    conn = get_dbcon('esg')
    resolution_main(conn)
    close_dbcon(conn)
def resolution_main(jm_code, rcp_no, rcp_yn, rcp_gb):
    # driver 세팅(결의, 공고)
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

    # 주총결의 데이터 세팅
    driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
    # 상장 구분
    if '유가' in rcp_gb:
        tb_mst = driver.find_elements_by_xpath(
            '//*[@id="XFormD52_Form0_Table0"]/tbody/tr')
    else:
        tb_mst = driver.find_elements_by_xpath(
            '//*[@id="XFormD2_Form0_Table0"]/tbody/tr')

    # 주총 결의
    meet_tb = [0 for x in range(9)]
    if '유가' in rcp_gb:
        meet_tb[0] = tb_mst[1].find_elements_by_tag_name('td')[1].text  # 일자
        meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[2].text  # 시간
        meet_tb[8] = tb_mst[0].find_elements_by_tag_name('td')[1].text  # 주총구분
    else:
        meet_tb[0] = tb_mst[0].find_elements_by_tag_name('td')[2].text  # 일자
        meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[1].text  # 시간
        meet_tb[8] = tb_mst[8].find_elements_by_tag_name('td')[1].text  # 주총구분

    meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text  # 장소
    meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text  # 의안내용
    meet_tb[4] = tb_mst[4].find_elements_by_tag_name('td')[1].text  # 이사회결의일
    meet_tb[5] = tb_mst[5].find_elements_by_tag_name('td')[2].text  # 사외이사_참
    meet_tb[6] = tb_mst[6].find_elements_by_tag_name('td')[1].text  # 사외이사_불참
    meet_tb[7] = tb_mst[7].find_elements_by_tag_name('td')[1].text  # 감사참석여부

    # 이사선임 & 사업목적
    isa_arr = []
    biz_arr = []
    if '유가' in rcp_gb:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9019"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9018"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9016"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9015"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L9019'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L9018'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L9016'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L9015'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9017"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L9017'))
    elif '코스닥' in rcp_gb:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7021"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7020"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7018"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7017"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L7021'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L7020'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L7018'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L7017'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7019"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L7019'))
    else:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3025"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3024"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3022"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3021"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L3025'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L3024'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L3022'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L3021'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3023"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L3023'))

    # DB 삽입
    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()

        try:
            # 조회용 주총 값
            ymd = make_ymd(meet_tb[0])
            gb = get_regYn(meet_tb[8])
            seq_select = """select * from proxy001 where meet_ymd = '{0}' and jm_code = '{1}' and meet_gb = '{2}'
                         """.format(ymd, jm_code, gb)

            cursor.execute(seq_select)
            rows = cursor.rowcount

            # 기재정정이 아닐 경우 중복체크
            if rcp_yn == '' and rows > 0:
                print('중복 데이터가 있습니다.')
                sys.exit(0)

            # report_ver 키값 생성(개정일 + seq)
            report_ver = rcp_no[:8] + str(rows + 1).zfill(2)

            # 결의 mst 삽입
            in_qry = resolution_mst_ins(meet_tb, jm_code, report_ver, rcp_no)
            cursor.execute(in_qry)
            print(in_qry)

            # 이사선임 삽입
            if isa_arr:
                ins_isa, dup_isa = isa_mst_ins(isa_arr, meet_tb[0], jm_code,
                                               gb, report_ver)  # 이사선임
                for i in range(0, len(ins_isa)):
                    # 이사 중복 체크
                    cursor.execute(dup_isa[i])
                    dup_cnt = cursor.rowcount
                    if dup_cnt > 0:
                        print('중복된 이사가 있습니다.')
                        continue

                    cursor.execute(ins_isa[i])
                    print(str(i) + " : " + ins_isa[i])

                    if chk_no_data(isa_arr[i][4]):
                        ins_isa_car = isa_car_ins(isa_arr[i], meet_tb[0],
                                                  jm_code, gb, report_ver,
                                                  i)  # 이사선임_경력
                        cursor.execute(ins_isa_car)
                        print(str(i) + " : " + ins_isa_car)

                    if chk_no_data(isa_arr[i][5]):
                        ins_isa_dup = isa_dup_ins(isa_arr[i], meet_tb[0],
                                                  jm_code, gb, report_ver,
                                                  i)  # 이사선임_겸직
                        cursor.execute(ins_isa_dup)
                        print(str(i) + " : " + ins_isa_dup)

            # 사업목적 변경 삽입
            if biz_arr:
                for i in range(0, len(biz_arr)):
                    ins_biz = biz_ins(biz_arr, meet_tb[0], jm_code, gb,
                                      report_ver)
                    cursor.execute(ins_biz[i])
                    print(str(i) + " : " + str(ins_biz[i]))
        except:
            f = open("C:\\Users\\rmffo\\PycharmProjects\\log\\error_log.txt",
                     'a')
            f.write(jm_code + '\n')
            f.close()

        cursor.close()
    finally:
        close_dbcon(conn)

    # driver close
    close_driver(driver)