def get_eval_corps_auto(self, date_maket_cap=None) -> pd.DataFrame: """100개의 주식 종목을 정해진 방법에 의해 가져온다""" if hasattr(self.params, 'invest_start_date' ) == False or self.params.invest_start_date is None: invest_start_date_str = DateUtils.today_str('%Y.%m.%d') else: invest_start_date_str = self.params.invest_start_date invest_start_date = DateUtils.to_date(invest_start_date_str) if hasattr(self.params, 'max_listing_period_years' ) == False or self.params.max_listing_period_years is None: max_listing_period_years = 20 else: max_listing_period_years = self.params.max_listing_period_years max_listing_date = DateUtils.add_years(invest_start_date, -max_listing_period_years) max_listing_date = DateUtils.to_date_str(max_listing_date, '%Y-%m-%d') corps = self.get_corps_all() corps = corps.query("상장일<'{}'".format(max_listing_date)) corps.loc[:, '종목코드'] = corps['종목코드'].astype(str).str.zfill(6) if date_maket_cap is None: date_maket_cap = invest_start_date_str #corps_cap = self.get_corps_maket_cap(date_maket_cap) corps_cap = self.get_now_corps_maket_cap() corps = corps.merge(corps_cap, on='종목코드') corps = corps.sort_values(by=["시가총액"], ascending=False) selected_corps_first = corps[:50] selected_corps_last = corps[len(corps) - 60:-10] return selected_corps_first.append(selected_corps_last, ignore_index=True)
def get_train_test(self, data, scaler_close=None): """train, test 데이터로 만든다.""" data = data.copy() data = data[(data[['close', 'open', 'high', 'low', 'volume']] != 0).all(1)] data.index = pd.RangeIndex(len(data.index)) #data = self.add_mean_line(data) if self.params.invest_end_date is not None: data = data.query("date<='{}'".format(self.params.invest_end_date)) if self.params.invest_start_date is not None: invest_data = data.query("date>='{}'".format( self.params.invest_start_date)) invest_count = len(invest_data.index) - 1 self.params.invest_count = invest_count invest_start_date_str = self.params.invest_start_date else: invest_count = 0 self.params.invest_count = 0 invest_start_date_str = data.tail(1)['date'].to_string(index=False) invest_start_date = DateUtils.to_date(invest_start_date_str) if hasattr(self.params, 'stock_training_period_years'): period = self.params.stock_training_period_years stock_start_date = DateUtils.add_years(invest_start_date, -period) stock_start_date = stock_start_date.strftime("%Y.%m.%d") data = data.query("date>='{}'".format(stock_start_date)) test_count = None if hasattr(self.params, 'stock_test_period_years' ) and self.params.stock_test_period_years is not None: period = self.params.stock_test_period_years test_start_date = DateUtils.add_years(invest_start_date, -period) test_start_date = DateUtils.to_date_str(test_start_date) test_data = data.query("date>='{}'".format(test_start_date)) test_count = len(test_data.index) - invest_count scaled_data, scaler_close = self.get_scaled_data(data, scaler_close) dataX, dataY, dataX_last, y_date = self.get_dataXY(scaled_data) data_params = self.split_train_test(dataX, dataY, invest_count, test_count, y_date) return data_params, scaler_close, dataX_last