def get_last_n_nanos(df: dd, nanos: int): end_time = df.iloc[0]['time'] start_time = DataUtils.date_to_unix(end_time, 'ns') + nanos converted = df['time'].apply(lambda x: DataUtils.date_to_unix(x, 'ns')) return df[start_time > converted]
def let_train_invests(self, corps, start_no=1): """입력한 회사들에 대해서 학습시키고 모의투자를 실행한다.""" if self.params.is_all_corps_model == True and self.params.remove_session_file == True: learning = Learning(self.params) learning.delete_learning_image() comp_rmses = [] no = 1 for index, corp_data in corps.iterrows(): if no < start_no: no += 1 continue corp_code = corp_data['종목코드'] corp_name = corp_data['회사명'] try: result = self.let_train_invest(corp_code, corp_name, no) except Exception as inst: print(inst) no += 1 continue comp_rmses.append(result) if no % 10 == 0: df_comp_rmses = pd.DataFrame(comp_rmses, columns=self.result_columns) DataUtils.save_excel(df_comp_rmses, self.get_result_file_path()) no += 1
def get_first_n_nanos(df: dd, nanos: int): start_time = df.iloc[0]['time'] end_time = DataUtils.date_to_unix(start_time, 'ns') + nanos converted = df['time'].apply(lambda x: DataUtils.date_to_unix(x, 'ns')) return df[converted < end_time]
def graph_distribution(data: pd.Series, description: str, xlabel: str, bins=20, std_devs: int = 2): sample_size = 10000 data = DataUtils().keep_n_std_dev(data, std_devs) if len(data) > sample_size: data = data.sample(n=sample_size) data = DataUtils().keep_n_std_dev(data, std_devs)
def trains(self, corps:pd.DataFrame=None)->(pd.DataFrame, pd.DataFrame): if corps is None: corp = Corp(self._global_params) corps = corp.get_eval_corps_auto() no = 1 invest_date = self._global_params.invest_start_date + "~" + self._global_params.invest_end_date results = [] info_data = [] for index, corp_data in corps.iterrows(): corp_code = corp_data['종목코드'] corp_name = corp_data['회사명'] invest_value, index_value, infos = self.train(corp_code=corp_code, corp_name=corp_name) result = [no, corp_code, corp_name, invest_value, index_value, invest_date] results.append(result) info_data.append(infos) print(result) # if no % 10 == 0: # df_results = pd.DataFrame(results, columns=self.RESULT_COLUMNS) # DataUtils.save_excel(df_results, self._get_result_file_path()) no += 1 df_results = pd.DataFrame(results, columns=self.RESULT_COLUMNS) DataUtils.save_excel(df_results, self._get_result_file_path()) chart_data = None try: visualizer = InvestVisualizer(self._global_params) chart_data = visualizer.draw_invest_4reinforcement(info_data, corps) except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout) return df_results, chart_data
def draw_predictions(self, corp_name, scaler_close, data_params, test_predict, invest_predicts=None): """예측 그래프를 그린다.""" dir = 'predicts' dir_chart = os.path.join(self.main.DIR_CHARTS, dir) DataUtils.create_dir(dir_chart) if invest_predicts is not None and test_predict is not None: predicts = np.append(test_predict, invest_predicts) #print(len(test_predict), len(invest_predicts)) elif test_predict is not None: predicts = test_predict else: predicts = invest_predicts dataY = np.append(data_params.testY, data_params.investY) preds = [] for pred in predicts: #print(pred, scaler_close) preds.append(DataUtils.inverse_scaled_data(scaler_close, pred)) close_values = [] for y in dataY: close_values.append(DataUtils.inverse_scaled_data(scaler_close, y)) df_data = self.get_predicts_data(data_params, close_values, preds) df_data['date'] = pd.to_datetime(df_data['date'], format='%Y.%m.%d') df_data_index = df_data.set_index('date') self.draw_predictions_seaborn(df_data_index, dir, corp_name) self.save_csv(df_data, dir, corp_name)
def train_n_invests_for_name(self, corp_names: list, invest_only: bool = False) -> None: """회사이름으로 검색하여 학습시킴 """ corp = Corp() comp_rmses = [] no = 1 date = None for corp_name in corp_names: corp_code = corp.get_corp_code(corp_name) try: result, invest_daily = self.train_n_invest( corp_code, corp_name, no, invest_only) date = result[1] except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout) no += 1 continue #result = self.let_train_invest(corp_code, corp_name, no) comp_rmses.append(result) no += 1 df_comp_rmses = pd.DataFrame(comp_rmses, columns=self.result_columns) DataUtils.save_csv(df_comp_rmses, self.get_result_file_path(date))
def recommend_corps(recommend_month: str, train_model: str = 'rnn') -> None: """하나의 세션으로 학습시키는 기본 모델 """ month = DateUtils.to_date(recommend_month, '%Y.%m') params = GlobalParams(train_model=train_model) #params.remove_session_file = True before_month_start = DateUtils.to_month_str(month, -params.mock_period_months) before_month_end = DateUtils.to_month_str(month, -1) params.invest_start_date = before_month_start + '.01' params.invest_end_date = DateUtils.to_date_str(month - datetime.timedelta(days=1)) params.result_file_name = "MOCK_" + before_month_start + "-" + before_month_end corp = Corp(params) corps = corp.get_eval_corps_auto(params.invest_end_date) invests = LearningNMockInvestment(params) invests.train_n_invests(corps) before_result = pd.read_csv(invests.get_result_file_path()) if params.rmse_max_recommend is not None: before_result = before_result.query("rmse<" + str(params.rmse_max_recommend)) before_result = before_result.sort_values(by='invest_result', ascending=False) before_result.index = range(len(before_result.index)) save_file_name = "recommend_months_" + recommend_month + ".xlsx" save_file_path = os.path.join('result', train_model, save_file_name) DataUtils.save_csv(before_result, save_file_path) print(before_result)
def get_relative_prices(trades_df: dd, other_df: dd) -> dd: price_over_time: dd = Statistics().get_price_over_time( trades_df).groupby( ['time'])['most_recent_trade_price'].mean().to_frame() other_df = DataUtils().fuzzy_join(other_df, price_over_time, on='time') relative_prices = other_df['relative_price'] relative_prices = DataUtils().remove_tails(relative_prices, 3) return relative_prices
def __fetch_real_prices(self): df = DataLoader().load_feed(self.config.real_root, self.sim_st, self.sim_st + timedelta(seconds=self.config.simulation_window), self.config.product) trades_df = DataSplitter.get_trades(df) trades_df['time'] = DataUtils().get_times_in_seconds_after_start(trades_df['time']) trades_df['price'].iloc[0] = DataUtils().get_first_non_nan(trades_df['price']) return trades_df[['time', 'price']]
def train_months(self, start:str='2018.01', end:str='2018.11', invest_money:float=100000000)->None: train_model = self._global_params.train_model start_month = DateUtils.to_date(start, '%Y.%m') end_month = DateUtils.to_date(end, '%Y.%m') between = DateUtils.between_months(start_month, end_month) invest_months_result = [] result_columns = ["month", "invest_money", "result_money"] MOCK_MONEY = 10000000 chart_data = [] for i in range(between + 1): # params.remove_session_file = True before_month_start = DateUtils.to_month_str(start_month, i - self._global_params.mock_period_months) before_month_end = DateUtils.to_month_str(start_month, i - 1) self._global_params.invest_start_date = before_month_start + '.01' self._global_params.invest_end_date = before_month_end + '.31' self._global_params.result_file_name = "MOCK_" + before_month_start + "-" + before_month_end self._global_params.invest_money = MOCK_MONEY corp = Corp(self._global_params) corps = corp.get_eval_corps_auto(self._global_params.invest_end_date) self._env.set_params(params=self._global_params) before_result, _ = self.trains(corps) now_month = DateUtils.to_month_str(start_month, i) before_result = corp.exclude_corps(before_result, now_month) before_result = before_result.sort_values(by='invest_result', ascending=False) before_result.index = range(len(before_result.index)) corp10_codes = before_result.loc[:9, 'code'] corp10_codes.index = range(len(corp10_codes.index)) corp10 = corp.get_corps_for_codes(corp10_codes) corp10_len = len(corp10.index) self._global_params.invest_start_date = now_month + '.01' self._global_params.invest_end_date = now_month + '.31' self._global_params.result_file_name = "INVEST_" + now_month self._global_params.invest_money = invest_money / corp10_len self._env.set_params(params=self._global_params) now_result, invest_chart_data = self.trains(corp10) chart_data.append(invest_chart_data) invest_money = now_result['invest_result'].sum() result = [now_month, self._global_params.invest_money * corp10_len, invest_money] invest_months_result.append(result) print(result) df_imr = pd.DataFrame(invest_months_result, columns=result_columns) save_file_name = "recommend_months_" + start + "-" + end + ".xlsx" if "_" in train_model: save_file_path = os.path.join('result', 'reinforcement', train_model, self._global_params.ensemble_type, save_file_name) else: save_file_path = os.path.join('result', 'reinforcement', train_model, save_file_name) DataUtils.save_excel(df_imr, save_file_path) if len(chart_data) > 1: visualizer = InvestVisualizer(self._global_params) visualizer.draw_invest_months(chart_data, start, end) print()
def draw_rmses(self, train_rmse, test_rmse, corp_name): """RMSE 그래프를 그린다.""" dir = 'trains' dir_chart = os.path.join(self.main.DIR_CHARTS, dir) DataUtils.create_dir(dir_chart) rmse_data = {'train': train_rmse, 'test': test_rmse} #print(len(train_rmse), len(test_rmse)) df_rmses = pd.DataFrame.from_dict(rmse_data) self.draw_rmse_seaborn(df_rmses, dir, corp_name) self.save_csv(df_rmses, dir, corp_name)
def invest_scaled_money(self, invest_scaled_predict, now_scaled_close, scaler_close, now_money, now_stock_cnt): """예측 값에 따라 매수 매도를 실행한다.""" now_close = DataUtils.inverse_scaled_data(scaler_close, now_scaled_close) if invest_scaled_predict == -1: invest_predict = -1 else: invest_predict = DataUtils.inverse_scaled_data( scaler_close, invest_scaled_predict) return self.invest_money(invest_predict, now_close, now_money, now_stock_cnt)
def invest_scaled_money_before(self, before_scaled_close, before_scaled_predict, x, now_money, now_stock_cnt, scaler_close): x_high, x_low = self.get_high_n_low(x, scaler_close) before_invest_predict = DataUtils.inverse_scaled_data( scaler_close, before_scaled_predict) before_close = DataUtils.inverse_scaled_data(scaler_close, before_scaled_close) return self.invest_money_before2(before_close, before_invest_predict, x_high, x_low, now_money, now_stock_cnt)
def let_train_invests_for_name(self, corp_names): """회사이름으로 검색하여 학습시킴 """ corp = Corp() comp_rmses = [] no = 1 for corp_name in corp_names: corp_code = corp.get_corp_code(corp_name) result = self.let_train_invest(corp_code, corp_name, no) comp_rmses.append(result) no += 1 df_comp_rmses = pd.DataFrame(comp_rmses, columns=self.result_columns) DataUtils.save_excel(df_comp_rmses, self.get_result_file_path())
def train_n_invests(self, corps, start_no=1, invest_only: bool = False) -> pd.DataFrame: """입력한 회사들에 대해서 학습시키고 모의투자를 실행한다.""" comp_rmses = [] no = 1 invest_daily_data = [] date: str = None for index, corp_data in corps.iterrows(): if no < start_no: no += 1 continue corp_code = corp_data['종목코드'] corp_name = corp_data['회사명'] try: result, invest_daily = self.train_n_invest( corp_code, corp_name, no, invest_only) date = result[7] if invest_daily is not None: invest_daily_data.append(invest_daily) except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout) no += 1 continue comp_rmses.append(result) if no % 10 == 0 and self.params.debug == True: df_comp_rmses = pd.DataFrame(comp_rmses, columns=self.result_columns) DataUtils.save_csv(df_comp_rmses, self.get_result_file_path(date)) no += 1 df_comp_rmses = pd.DataFrame(comp_rmses, columns=self.result_columns) DataUtils.save_csv(df_comp_rmses, self.get_result_file_path(date)) #DataUtils.save_csv(df_comp_rmses, self.get_result_file_path(date)) if len(invest_daily_data) > 1: try: visualizer = InvestVisualizer(self.params) return visualizer.draw_invest_daily(invest_daily_data, corps) except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, file=sys.stdout)
def get_session_dir(self, corp_code: str, is_reinforcement: bool = False, name: str = 'main') -> str: """저장할 세션의 디렉토리""" file_name = self.get_session_filename(corp_code) if is_reinforcement: dir = os.path.join(self.SESSIONS_DIR, 'reinforcement', name, self.params.train_model, file_name) else: dir = os.path.join(self.SESSIONS_DIR, self.params.train_model, file_name) DataUtils.create_dir(dir) return dir
def get_now_corps_maket_cap(self): date = DateUtils.today_str('%Y%m%d') # 오늘 날짜 year = date[0:4] file_path = os.path.join(self.DIR, "files", "corps_market_cap", year, "market_cap_" + date + ".txt") if not os.path.isfile(file_path): master_data = self.get_now_coprs_master_price_from_krx() market_cap = master_data[['종목코드', '자본금(원)']] market_cap.rename(columns={'자본금(원)': '시가총액'}, inplace=True) DataUtils.save_csv(market_cap, file_path) else: market_cap = pd.read_csv(file_path) market_cap.loc[:, '종목코드'] = market_cap['종목코드'].astype(str).str.zfill(6) return market_cap
def extract_mid_prices_at_times(all_sims, seconds_list): time_prices_dict = {} for seconds in seconds_list: time_prices_dict[seconds] = [] sim_index = 0 for sim in all_sims: midprices_dd = sim[3] midprices_df = midprices_dd.compute() midprices_df['time'] = DataUtils().get_times_in_seconds_after_start(midprices_df['time']) for seconds in seconds_list: price = DataUtils.get_last_price_before(midprices_df, seconds) time_prices_dict[seconds].append(price) sim_index += 1 return time_prices_dict
def forcasts(self, corps_n_date:list): results = [] no = 1 for corp_n_date in corps_n_date: corp_code = corp_n_date[0].replace("A", "") corp_name = corp_n_date[1] forcast_date = corp_n_date[2] result = self.forcast(corp_name=corp_name, corp_code=corp_code, forcast_date=forcast_date) result.insert(0, no) print(result) results.append(result) no += 1 df_comp_rmses = pd.DataFrame(results, columns=self.FORCAST_COLUMNS) DataUtils.save_excel(df_comp_rmses, self._get_result_file_path())
def get_stock_data(self, comp_code: str) -> pd.DataFrame: comp_code = DataUtils.to_string_corp_code(comp_code) file_path = os.path.join(self.DIR_STOCKS, comp_code + '.txt') if os.path.isfile(file_path): stock_data = pd.read_csv(file_path) if hasattr(self.params, 'check_stock_data' ) and self.params.check_stock_data == True: stock_data = stock_data.dropna() stock_data = stock_data[:-1] date_last = stock_data.tail(1)['date'].to_string(index=False) date_next = DateUtils.to_date(date_last) + datetime.timedelta( days=1) date_next = date_next.strftime("%Y.%m.%d") new_data = self.get_stock_web_data(comp_code, date_next) if len(new_data) > 0: stock_data = stock_data.append(new_data, ignore_index=True) stock_data = stock_data.dropna() stock_data.to_csv(file_path, index=False) else: stock_data = self.get_stock_web_data(comp_code, '') stock_data.to_csv(file_path, index=False) stock_data = stock_data.dropna() if hasattr(self.params, 'forcast_date') and self.params.forcast_date is not None: stock_data = stock_data.query("date<'{}'".format( self.params.forcast_date)) elif hasattr( self.params, 'remove_stock_days') and self.params.remove_stock_days > 0: stock_data = stock_data[:-self.params.remove_stock_days] return stock_data
def get_session_filename(self, corp_code): """저장할 세션의 파일명""" if self.params.is_all_corps_model: file_name = self.params.session_file_name else: file_name = DataUtils.to_string_corp_code(corp_code) return file_name
def __graph_price_time_set(df: dd, marker: str): y = df['price'].astype('float64').fillna(method='ffill') times = df['time'].astype('datetime64[ns]').apply(lambda x: DataUtils.date_to_unix(x, 'ns')) start_time = times.min() x = times.apply(lambda z: (z - start_time) / 1e9) self.config.plt.plot(x, y, marker)
def get_monte_carlo_data(self, sim_analysis): sim_prices = {} times = self.get_xaxis_times() for index in range(0, len(sim_analysis.all_sims)): sim_prices[index] = [] for index in range(0, len(sim_analysis.all_sims)): sim = sim_analysis.all_sims[index] trades_dd = sim[1] trades_df = trades_dd.compute() if len(trades_df) == 0: continue trades_df['time'] = DataUtils().get_times_in_seconds_after_start(trades_df['time']) for seconds in times: price = DataUtils.get_last_price_before(trades_df, seconds) sim_prices[index].append(price) return sim_prices
def trains(self, corps): """ 선형회귀 모형 학습 """ comp_rmses = [] no = 1 for index, corp_data in corps.iterrows(): corp_code = corp_data['종목코드'] corp_name = corp_data['회사명'] try : result = self.train(corp_code, corp_name, no) except Exception as inst: print(inst) no += 1 continue comp_rmses.append(result) no += 1 df_comp_rmses = pd.DataFrame(comp_rmses, columns=self.RESULT_COLUMNS) DataUtils.save_excel(df_comp_rmses, self.get_result_file_path())
def invest_n_all(self, comp_code, dataX_last, data_params, params_all, scaler_close=None): """학습 후 모의 주식 거래를 한다.""" ip = InvestParams() investX = data_params.investX invest_count = len(investX) ip.invest_money = self.params.invest_money now_stock_cnt = 0 ip.index_money = ip.invest_money all_stock_count = 0 before_scaled_close = None before_invest_predict = None now_scaled_close = None for i in range(invest_count): x = investX[i:i + 1] invest_predict = self._get_predict(self.params, comp_code, x) invest_predict_all = self._get_predict(params_all, comp_code, x) invest_predict = np.mean([invest_predict, invest_predict_all]) ip.predict_list.append([invest_predict]) x_last = x[:, -1][0] now_scaled_close = x_last[0] if i != 0: ip.invest_money, now_stock_cnt = self.invest_scaled_money_before( before_scaled_close, before_invest_predict, x, ip.invest_money, now_stock_cnt, scaler_close) ip.invest_money, now_stock_cnt = self.invest_scaled_money( invest_predict, now_scaled_close, scaler_close, ip.invest_money, now_stock_cnt) if i == 0: ip.index_money, all_stock_count = self.invest_scaled_money( -1, now_scaled_close, scaler_close, ip.index_money, all_stock_count) before_scaled_close = now_scaled_close before_invest_predict = invest_predict if now_scaled_close != None: now_close = DataUtils.inverse_scaled_data(scaler_close, now_scaled_close) ip.invest_money += self.to_money(now_stock_cnt, now_close) ip.index_money += self.to_money(all_stock_count, now_close) # 마지막 예측 값을 구한다. ip.last_predict = self._get_predict(self.params, comp_code, dataX_last) last_predict_all = self._get_predict(params_all, comp_code, dataX_last) ip.last_predict = np.mean([ip.last_predict, last_predict_all]) return ip
def extract_trade_prices_at_times(all_sims, seconds_list): prices_dfs = [] for sim in all_sims: trades_dd = sim[1] trades_df = trades_dd.compute() if len(trades_df) == 0: continue trades_df['time'] = DataUtils().get_times_in_seconds_after_start(trades_df['time']) prices_dfs.append(trades_df) return SimulationAnalysis.extract_prices_at_times(prices_dfs, seconds_list)
def extract_prices_at_times(prices_dfs: List[pd.DataFrame], seconds_list): time_prices_dict = {} for seconds in seconds_list: time_prices_dict[seconds] = [] sim_index = 0 for price_df in prices_dfs: for seconds in seconds_list: price = DataUtils.get_last_price_before(price_df, seconds) time_prices_dict[seconds].append(price) sim_index += 1 return time_prices_dict
def get_file_path(self, dir:str, corp_name:str, extension:str, start:str=None, end:str=None)->str: """저장할 파일 경로 """ if start is None: invest_start_date = self.params.invest_start_date else: invest_start_date = start if end is None: invest_end_date = self.params.invest_end_date else: invest_end_date = end if dir == 'invests_total': dir = os.path.join(self.main.DIR_CHARTS, dir) else: dir = os.path.join(self.main.DIR_CHARTS, dir, corp_name) DataUtils.create_dir(dir) corp_name = corp_name.replace(" ", "") return os.path.join(dir, corp_name + "_" + invest_start_date + "-" + invest_end_date + "." + extension)
def get_ask_data(self, sim_analysis): ret = {} for index in range(0, len(sim_analysis.all_sims)): sim = sim_analysis.all_sims[index] _, _, _, _, _, best_asks_dd = sim best_bids_df = best_asks_dd.compute() if len(best_bids_df) == 0: continue best_bids_df['time'] = DataUtils().get_times_in_seconds_after_start(best_bids_df['time']) ret[index] = best_bids_df return ret