def file_processing(csv_file_name): trading_df = pd.read_csv(os.path.join(CRAWLING_TARGET_PATH, csv_file_name), delimiter=',', encoding='CP949', names=COLUMN_NAMES, skiprows=[0]) trading_df = trading_df.fillna(0) trading_df['date'] = parse(str(re.findall('\d{8}', csv_file_name)[0])).date() trading_df['m_type'] = trading_df['m_type']\ .apply(lambda m_type: m_type_dict[str(m_type)]) trading_df = trading_df.drop(['m_dept'], axis=1) smdw.insert(trading_df) shutil.move(os.path.join(CRAWLING_TARGET_PATH, csv_file_name), os.path.join(CRAWLING_BACKUP_PATH, csv_file_name))
def insert_company_from_market(): se = StartEndLogging() scw.delete() market_qs = smdw.gets(date=smdw.get_date(is_add_one=False)) log.debug(f'market data size: {len(market_qs)}') company_objects = [] for market_data in market_qs: company_object = scw.make_object(market_data) if company_object is not None: company_objects.append(company_object) scw.insert(company_objects) se.end()
def get_krx_crawling_period(s_date, e_date, is_that_day=False): t_date = smdw.get_date(is_add_one=True) try: if s_date.lower() == 'all': cs_date = parse(BASE_TRADING_DATE).date() else: cs_date = parse(s_date).date() if not is_that_day: cs_date = max([cs_date, t_date]) except: cs_date = t_date today = datetime.now().date() curr_time = datetime.now().time() try: ce_date = parse(e_date).date() except: ce_date = datetime.now().date() if ce_date > today: ce_date = today elif ce_date == today and curr_time < parse(BASE_CRAWLING_TIME).time(): ce_date = ce_date - timedelta(days=1) ce_date = max([cs_date, ce_date]) return cs_date, ce_date
def insert(): trading_df = pd.read_csv(os.path.join(CRAWLING_TARGET_PATH, '20210414.csv'), delimiter=',', encoding='CP949', names=COLUMN_NAMES, skiprows=[0]) trading_df = trading_df.fillna(0) trading_df['date'] = parse(str(re.findall('\d{8}', csv_file_name)[0])).date() trading_df['m_type'] = trading_df['m_type'] \ .apply(lambda m_type: m_type_list[str(m_type)]) trading_df = trading_df.drop(['m_dept'], axis=1) smdw.insert(trading_df)
def get_normal_marketdata(com_code): company_market_qs = smdw.gets(com_code=com_code) first_data = company_market_qs.first() yesterday_data, first_normal_data = first_data, first_data if first_data.t_volume != 0: diff_ratio = company_market_qs.last( ).t_volume / first_data.t_volume else: # 첫번째 t_volume값이 0이기 때문에 전체 변동량 체크가 불가능함. # 따라서 세부 체크가 수행될 수 있도록 diff_ratio를 설정함 diff_ratio = TOTAL_CHECK_MAX_RATIO + 1. if diff_ratio > TOTAL_CHECK_MAX_RATIO or diff_ratio < TOTAL_CHECK_MIN_RATIO: for market_data in company_market_qs[ 1:]: # 첫번째 값은 위에서 first()을 이용해 이미 사용 if yesterday_data.t_volume != 0 and market_data.t_volume != 0: diff_ratio = market_data.t_volume / yesterday_data.t_volume if diff_ratio > DAY_CHECK_MAX_RATIO or diff_ratio < DAY_CHECK_MIN_RATIO: first_normal_data = market_data # 당일 거래량이 없는 종목은 감자/증자 대상으로 간주한다. if market_data.volume == 0: first_normal_data = market_data yesterday_data = market_data normal_qs = company_market_qs.filter(date__gte=first_normal_data.date) log.debug(f'com_code: {com_code}, modeling data size: ' f'total({len(company_market_qs)}), normal({len(normal_qs)})') return normal_qs
def insert_marketdata_from_mergefile(): insert_se = StartEndLogging('insert marketdata') col_names = ['com_code', 'com_name', 'm_type', 'm_dept', 'close', 'diff', 'ratio', 'open', 'high', 'low', 'volume', 'value', 't_value', 't_volume', 'date'] # merge_reader = pd.read_csv(MERGE_FILE_PATH, encoding='CP949', low_memory=False, # names=col_names, chunksize=3000, skiprows=[0]) # # loop_cnt = 1 # for merge_df in merge_reader: # merge_df = merge_df.fillna(0) # smdw.insert(merge_df) # insert_se.mid(f'{loop_cnt * 3000}') # loop_cnt += 1 smdw.delete() insert_se.end()
def update_company_from_market(): se = StartEndLogging() market_qs = smdw.gets(date=smdw.get_date(is_min=False, is_add_one=False)) company_df = read_frame(scw.gets('id')) log.debug(f'market data size: {len(market_qs)}, company data size: {len(company_df)}') company_objects = [] for market_data in market_qs: company = company_df[company_df['com_code']==market_data.com_code] if company.empty: company_object = scw.make_object(market_data) else: company_object = scw.make_object(company, market_data) if company_object is not None: company_objects.append(company_object) scw.insert(company_objects) se.end()
def update_modelingdata_from_market(): """ daily 작업에서는 추가되는 회사 데이터의 건수에 대한 카운팅은 무시하고 업데이트 되지 않은 마켓 데이터를 일괄로 추가한다. - 이 작업은 시스템 초기화나 주간 작업에서만 수행한다. :return: 없음 """ se = StartEndLogging() try: data_df = read_frame(smdw.gets('date', date__gt=smlw.get_date())) smlw.insert(data_df[['date','com_code']+MODELING_COLUMNS]) except Exception as e: log.error(e) sys.exit() se.end()
def delete(): smdw.delete()