import hparam as conf import sessionWrapper as sesswrapper from utility import dataProcess as dp from utility import general_utility as gu import model_zoo as mz import loss_func as l import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn import preprocessing from utility_trial import * tv_gen = dp.train_validation_generaotr() *_, meta = gu.read_metafile( '/home/ubuntu/dataset/etf_prediction/all_meta_data_Nm_1_MinMax_94.pkl') f = tv_gen._load_data( '/home/ubuntu/dataset/etf_prediction/all_feature_data_Nm_1_MinMax_94.pkl') stock_list = [ '0050', '0051', '0052', '0053', '0054', '0055', '0056', '0057', '0058', '0059', '006201', '006203', '006204', '006208', '00690', '00692', '00701', '00713' ] period = ['20130101', '20180520'] prob_ud = {} for s in stock_list: data = tv_gen._selectData2array(f, [s], period) prob_ud[s] = [
import hparam as conf import sessionWrapper as sesswrapper from utility import dataProcess as dp from utility import general_utility as gu import model_zoo as mz import loss_func as l import sklearn.preprocessing as p tf.reset_default_graph() c = conf.config('trial_cnn_cls').config['common'] sample_window = c['input_step'] + c['predict_step'] tv_gen = dp.train_validation_generaotr() meta = gu.read_metafile(c['meta_file_path']) f = tv_gen._load_data(c['src_file_path']) stock = tv_gen._selectData2array(f, f.index, None) #******Add Extra Feature******* stock = add_DOW(stock) #****************************** stock_diff = stock[1:,:-3] - stock[:-1,:-3] stock_diff = np.concatenate((stock_diff, stock[1:,-3:]), axis=1) clean_stock = {} missin_feature = [] stock_IDs = f.index
def get_ens_model(lagday=5, model_temp=xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True)): print('**********Generate model for {} day***********'.format(lagday)) c = conf.config('trial_cnn_cls').config['common'] *_, meta = gu.read_metafile(c['meta_file_path']) tv_gen = dp.train_validation_generaotr() f = tv_gen._load_data(c['src_file_path']) data = tv_gen._selectData2array(f, f.index[:-4], None) data_velocity = (data[1:, 0:4] - data[:-1, 0:4]) / (data[:-1, 0:4] + 0.1) data = data[1:] train_sample = data[:-30] train_sample_v = data_velocity[:-30] flat_train_sample = np.reshape(np.transpose(train_sample, (0, 2, 1)), (-1, 94)) flat_train_sample_velocity = np.reshape( np.transpose(train_sample_v, (0, 2, 1)), (-1, 4)) test_sample = data[-30:] test_sample_v = data_velocity[-30:] flat_test_sample = np.reshape(np.transpose(test_sample, (0, 2, 1)), (-1, 94)) flat_test_sample_velocity = np.reshape( np.transpose(test_sample_v, (0, 2, 1)), (-1, 4)) # # flat_train_sample = train_data['train'] # flat_train_sample_velocity = train_data['train_velocity'] # # flat_test_sample = test_data['test'] # flat_test_sample_velocity = test_data['test_velocity'] fe_train = feature_extractor(flat_train_sample, flat_train_sample_velocity) d_ratio = fe_train.ratio() d_kdj_ratio = fe_train.kdj_ratio() d_ratio_velocity = fe_train.ratio_velocity() d_ud = fe_train.ud() d_kdj_macd_rssi_ratio = fe_train.kdj_macd_rssi_ratio() fe_test = feature_extractor(flat_test_sample, flat_test_sample_velocity) d_ratio_test = fe_test.ratio() d_kdj_ratio_test = fe_test.kdj_ratio() d_ratio_velocity_test = fe_test.ratio_velocity() d_ud_test = fe_test.ud() d_kdj_macd_rssi_ratio_test = fe_test.kdj_macd_rssi_ratio() train_label_raw = np.stack( (flat_train_sample[:, -3] + flat_train_sample[:, -2], flat_train_sample[:, -1]), axis=1) test_label_raw = np.stack( (flat_test_sample[:, -3] + flat_test_sample[:, -2], flat_test_sample[:, -1]), axis=1) model_dict = {} predict_dict = {} #*****ratio******** train, train_label = data_label_shift(d_ratio, train_label_raw, lag_day=lagday) test, test_label = data_label_shift(d_ratio_test, test_label_raw, lag_day=lagday) train_label = np.argmax(train_label, axis=-1) test_label = np.argmax(test_label, axis=-1) model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['ratio'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['ratio'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [ratio]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [ratio]: ", accuracy_score(y_xgb_v, test_label)) #*****kdj_ratio******** train = d_kdj_ratio[:-lagday] test = d_kdj_ratio_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['kdj_ratio'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['kdj_ratio'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [kdj_ratio]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [kdj_ratio]: ", accuracy_score(y_xgb_v, test_label)) #*****ratio_velocity******** train = d_ratio_velocity[:-lagday] test = d_ratio_velocity_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['ratio_velocity'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['ratio_velocity'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [ratio_velocity]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [ratio_velocity]: ", accuracy_score(y_xgb_v, test_label)) #*****ud******** train = d_ud[:-lagday] test = d_ud_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['ud'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['ud'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [ud]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [ud]: ", accuracy_score(y_xgb_v, test_label)) #*****kdj_macd_rssi_ratio******** train = d_kdj_macd_rssi_ratio[:-lagday] test = d_kdj_macd_rssi_ratio_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['kdj_macd_rssi_ratio'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['kdj_macd_rssi_ratio'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [kdj_macd_rssi_ratio]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [kdj_macd_rssi_ratio]: ", accuracy_score(y_xgb_v, test_label)) #*********Generate assemble input*********** predict_train = [] predict_test = [] for k in predict_dict: predict_train.append(predict_dict[k][0]) predict_test.append(predict_dict[k][1]) predict_train = np.stack(predict_train, axis=1) predict_test = np.stack(predict_test, axis=1) model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=10, silent=True) model.fit(predict_train, train_label) model_dict['ensemble'] = model y_xgb_train_ens = model.predict(predict_train) y_xgb_v_ens = model.predict(predict_test) print("Train Accuracy [Ens]: ", accuracy_score(y_xgb_train_ens, train_label)) print("Validation Accuracy [Ens]: ", accuracy_score(y_xgb_v_ens, test_label)) return model_dict
for i in range(model_config['days']): for k in features[dow[i]]: feature_concat.append(features[dow[i]][k]) data_feature = np.concatenate(feature_concat, axis=1) data = data_feature label = label return data, label #srcPath = '/home/ubuntu/dataset/etf_prediction/all_feature_data_Nm_1_MinMax_94.pkl' srcPath = '../Data/0525/all_feature_data_Nm_1_MinMax_120.pkl' metaPath = '../Data/0525/all_meta_data_Nm_1_MinMax_120.pkl' tv_gen = dp.train_validation_generaotr() *_, meta = gu.read_metafile(metaPath) f = tv_gen._load_data(srcPath) mConfig = open( '/home/dashmoment/workspace/etf_prediction/trainer/config/20180526/best_config_xgb_dow_all.pkl', 'rb') #mConfig = open('/home/ubuntu/shared/workspace/etf_prediction/trainer/config/best_config_xgb_dow_all.pkl', 'rb') best_config = pickle.load(mConfig) predict_ud = {} for s in stock_list: predict_ud[s] = [] for predict_day in predict_days: model_config = best_config[s][predict_day]
#stock_list = ['0050', '0052', '0053', '0054', '0055', '0056', '0057', '0058', '0059', '006201', # '006203', '006204', '006208','00690', '00692', '00701', '00713'] stock_list = ['0055'] score = {} for sk in stock_list: score[sk] = {} print('Scoring stock: ', sk) src_path = '/home/ubuntu/dataset/etf_prediction/ETF_member/all_feature_data_Nm_1_MinMax_94_' + str( sk) + '.pkl' meta_path = '/home/ubuntu/dataset/etf_prediction/ETF_member/all_meta_data_Nm_1_MinMax_94_' + str( sk) + '.pkl' meta = gu.read_metafile(meta_path) # _dp = dp.data_processor(src_path, # lagday = lagday, period=['20130101', '20180311'], # stockList = [sk]) _dp = dp.data_processor(src_path, lagday=lagday, period=['20160101', '2016531']) clean_stock = _dp.clean_data() train_val_set = _dp.split_train_val_set_mstock(clean_stock, 0.01) train_fe = ens.feature_extractor(train_val_set['train'], None) test_fe = ens.feature_extractor(train_val_set['test'], None) train_data_ = train_fe.ratio() test_data_ = test_fe.ratio()
def generate_train_val_set_mStock( self, filepath, stock_IDs, train_windows, predict_windows, train_val_ratio, is_special_list=False, metafile='/home/dashmoment/workspace/etf_prediction/Data/all_meta_data_Nm[0]_59.pkl' ): # train_windows = 50 # predict_windows = 5 # train_val_ratio = 0.2 # filepath = './Data/all_feature_data_Nm[0]_59.pkl' *_, feature_names = ut.read_metafile(metafile) testSet = self._load_data(filepath) clean_stock = {} missin_feature = [] if is_special_list: special_list = { '00690': "20170330", '00692': "20170516", '00701': "20170816", '00713': "20170927" } for s in special_list: mask = (testSet.columns > special_list[s]) cut_testSet = testSet.iloc[:, mask] stock_s = cut_testSet.loc[s] clean_set = [] [clean_set.append(row) for row in stock_s] clean_set = np.vstack(clean_set) tmpDF = pd.DataFrame(clean_set, columns=feature_names) missin_feature.append( tmpDF.columns[tmpDF.isnull().any()].tolist()) tmpDF = tmpDF.dropna(axis=[1]) clean_stock[s] = tmpDF all_stock_list = stock_IDs + ["00690", "00692", "00701", "00713"] else: all_stock_list = stock_IDs for s in stock_IDs: stock = testSet.loc[s] clean_set = [] [clean_set.append(row) for row in stock] clean_set = np.vstack(clean_set) tmpDF = pd.DataFrame(clean_set, columns=feature_names) if is_special_list: clean_stock[s] = tmpDF.drop(missin_feature[-1], axis=1) else: clean_stock[s] = tmpDF train = [] validation = [] train_raw = {} validation_raw = {} for s in all_stock_list: tmp_train, tmp_validation = self._split_train_val_side_by_side( clean_stock[s], train_windows, predict_windows, train_val_ratio) train.append(tmp_train) validation.append(tmp_validation) train_raw[s] = tmp_train validation_raw[s] = tmp_validation train = np.vstack(train) validation = np.vstack(validation) return train, validation, train_raw, validation_raw, missin_feature
import sys sys.path.append('../') import numpy as np import hparam as conf import evaluation_zoo as evalf from utility import general_utility conf_reg = conf.config( 'test_onlyEnc_biderect_gru_nospecialstock_cls').config['common'] close_price_mean_var, *_ = general_utility.read_metafile( conf_reg['meta_file_path']) mean = close_price_mean_var.mean_[0] std = np.sqrt(close_price_mean_var.var_[0]) stockID = ['0050'] reg = evalf.regression_score(conf_reg, stockID, mean, std) reg_score, *_ = reg.regression_score() r2Cls = evalf.regression2Cls_score(conf_reg, stockID, mean, std) r2Cls_predict, *_ = r2Cls.regression2Cls_score() conf_cls = conf.config( 'test_onlyEnc_biderect_gru_nospecialstock_cls').config['common'] cls = evalf.classification_score(conf_cls, stockID) cls_score, predict_s, gt = cls.classification_score()