import hparam as conf
import sessionWrapper as sesswrapper
from utility import dataProcess as dp
from utility import general_utility as gu
import model_zoo as mz
import loss_func as l
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing

from utility_trial import *

tv_gen = dp.train_validation_generaotr()
*_, meta = gu.read_metafile(
    '/home/ubuntu/dataset/etf_prediction/all_meta_data_Nm_1_MinMax_94.pkl')
f = tv_gen._load_data(
    '/home/ubuntu/dataset/etf_prediction/all_feature_data_Nm_1_MinMax_94.pkl')

stock_list = [
    '0050', '0051', '0052', '0053', '0054', '0055', '0056', '0057', '0058',
    '0059', '006201', '006203', '006204', '006208', '00690', '00692', '00701',
    '00713'
]

period = ['20130101', '20180520']

prob_ud = {}
for s in stock_list:
    data = tv_gen._selectData2array(f, [s], period)
    prob_ud[s] = [
Пример #2
0
import hparam as conf
import sessionWrapper as sesswrapper
from utility import dataProcess as dp
from utility import general_utility as gu
import model_zoo as mz
import loss_func as l

import sklearn.preprocessing as p

tf.reset_default_graph()  
c = conf.config('trial_cnn_cls').config['common']
sample_window = c['input_step'] + c['predict_step']

tv_gen = dp.train_validation_generaotr()
meta = gu.read_metafile(c['meta_file_path'])
f = tv_gen._load_data(c['src_file_path'])
stock = tv_gen._selectData2array(f, f.index, None)

#******Add Extra Feature*******
stock = add_DOW(stock)


#******************************

stock_diff = stock[1:,:-3] - stock[:-1,:-3]
stock_diff = np.concatenate((stock_diff, stock[1:,-3:]), axis=1)

clean_stock = {}
missin_feature = []
stock_IDs = f.index
Пример #3
0
def get_ens_model(lagday=5,
                  model_temp=xgb.XGBClassifier(max_depth=3,
                                               learning_rate=0.05,
                                               n_estimators=500,
                                               silent=True)):

    print('**********Generate model for {} day***********'.format(lagday))

    c = conf.config('trial_cnn_cls').config['common']
    *_, meta = gu.read_metafile(c['meta_file_path'])
    tv_gen = dp.train_validation_generaotr()
    f = tv_gen._load_data(c['src_file_path'])
    data = tv_gen._selectData2array(f, f.index[:-4], None)

    data_velocity = (data[1:, 0:4] - data[:-1, 0:4]) / (data[:-1, 0:4] + 0.1)
    data = data[1:]

    train_sample = data[:-30]
    train_sample_v = data_velocity[:-30]
    flat_train_sample = np.reshape(np.transpose(train_sample, (0, 2, 1)),
                                   (-1, 94))
    flat_train_sample_velocity = np.reshape(
        np.transpose(train_sample_v, (0, 2, 1)), (-1, 4))

    test_sample = data[-30:]
    test_sample_v = data_velocity[-30:]
    flat_test_sample = np.reshape(np.transpose(test_sample, (0, 2, 1)),
                                  (-1, 94))
    flat_test_sample_velocity = np.reshape(
        np.transpose(test_sample_v, (0, 2, 1)), (-1, 4))

    #
    #    flat_train_sample = train_data['train']
    #    flat_train_sample_velocity = train_data['train_velocity']
    #
    #    flat_test_sample = test_data['test']
    #    flat_test_sample_velocity = test_data['test_velocity']

    fe_train = feature_extractor(flat_train_sample, flat_train_sample_velocity)
    d_ratio = fe_train.ratio()
    d_kdj_ratio = fe_train.kdj_ratio()
    d_ratio_velocity = fe_train.ratio_velocity()
    d_ud = fe_train.ud()
    d_kdj_macd_rssi_ratio = fe_train.kdj_macd_rssi_ratio()

    fe_test = feature_extractor(flat_test_sample, flat_test_sample_velocity)
    d_ratio_test = fe_test.ratio()
    d_kdj_ratio_test = fe_test.kdj_ratio()
    d_ratio_velocity_test = fe_test.ratio_velocity()
    d_ud_test = fe_test.ud()
    d_kdj_macd_rssi_ratio_test = fe_test.kdj_macd_rssi_ratio()

    train_label_raw = np.stack(
        (flat_train_sample[:, -3] + flat_train_sample[:, -2],
         flat_train_sample[:, -1]),
        axis=1)
    test_label_raw = np.stack(
        (flat_test_sample[:, -3] + flat_test_sample[:, -2],
         flat_test_sample[:, -1]),
        axis=1)

    model_dict = {}
    predict_dict = {}
    #*****ratio********
    train, train_label = data_label_shift(d_ratio,
                                          train_label_raw,
                                          lag_day=lagday)
    test, test_label = data_label_shift(d_ratio_test,
                                        test_label_raw,
                                        lag_day=lagday)
    train_label = np.argmax(train_label, axis=-1)
    test_label = np.argmax(test_label, axis=-1)

    model = xgb.XGBClassifier(max_depth=3,
                              learning_rate=0.05,
                              n_estimators=500,
                              silent=True)
    model.fit(train, train_label)
    model_dict['ratio'] = model

    y_xgb_train = model.predict(train)
    y_xgb_v = model.predict(test)
    predict_dict['ratio'] = [y_xgb_train, y_xgb_v]

    print("Train Accuracy [ratio]: ", accuracy_score(y_xgb_train, train_label))
    print("Validation Accuracy [ratio]: ", accuracy_score(y_xgb_v, test_label))

    #*****kdj_ratio********
    train = d_kdj_ratio[:-lagday]
    test = d_kdj_ratio_test[:-lagday]

    model = xgb.XGBClassifier(max_depth=3,
                              learning_rate=0.05,
                              n_estimators=500,
                              silent=True)
    model.fit(train, train_label)
    model_dict['kdj_ratio'] = model

    y_xgb_train = model.predict(train)
    y_xgb_v = model.predict(test)
    predict_dict['kdj_ratio'] = [y_xgb_train, y_xgb_v]

    print("Train Accuracy [kdj_ratio]: ",
          accuracy_score(y_xgb_train, train_label))
    print("Validation Accuracy [kdj_ratio]: ",
          accuracy_score(y_xgb_v, test_label))

    #*****ratio_velocity********
    train = d_ratio_velocity[:-lagday]
    test = d_ratio_velocity_test[:-lagday]

    model = xgb.XGBClassifier(max_depth=3,
                              learning_rate=0.05,
                              n_estimators=500,
                              silent=True)
    model.fit(train, train_label)
    model_dict['ratio_velocity'] = model

    y_xgb_train = model.predict(train)
    y_xgb_v = model.predict(test)
    predict_dict['ratio_velocity'] = [y_xgb_train, y_xgb_v]

    print("Train Accuracy [ratio_velocity]: ",
          accuracy_score(y_xgb_train, train_label))
    print("Validation Accuracy [ratio_velocity]: ",
          accuracy_score(y_xgb_v, test_label))

    #*****ud********
    train = d_ud[:-lagday]
    test = d_ud_test[:-lagday]

    model = xgb.XGBClassifier(max_depth=3,
                              learning_rate=0.05,
                              n_estimators=500,
                              silent=True)
    model.fit(train, train_label)
    model_dict['ud'] = model

    y_xgb_train = model.predict(train)
    y_xgb_v = model.predict(test)

    predict_dict['ud'] = [y_xgb_train, y_xgb_v]

    print("Train Accuracy [ud]: ", accuracy_score(y_xgb_train, train_label))
    print("Validation Accuracy [ud]: ", accuracy_score(y_xgb_v, test_label))

    #*****kdj_macd_rssi_ratio********
    train = d_kdj_macd_rssi_ratio[:-lagday]
    test = d_kdj_macd_rssi_ratio_test[:-lagday]

    model = xgb.XGBClassifier(max_depth=3,
                              learning_rate=0.05,
                              n_estimators=500,
                              silent=True)
    model.fit(train, train_label)
    model_dict['kdj_macd_rssi_ratio'] = model

    y_xgb_train = model.predict(train)
    y_xgb_v = model.predict(test)

    predict_dict['kdj_macd_rssi_ratio'] = [y_xgb_train, y_xgb_v]

    print("Train Accuracy [kdj_macd_rssi_ratio]: ",
          accuracy_score(y_xgb_train, train_label))
    print("Validation Accuracy [kdj_macd_rssi_ratio]: ",
          accuracy_score(y_xgb_v, test_label))

    #*********Generate assemble input***********

    predict_train = []
    predict_test = []
    for k in predict_dict:
        predict_train.append(predict_dict[k][0])
        predict_test.append(predict_dict[k][1])

    predict_train = np.stack(predict_train, axis=1)
    predict_test = np.stack(predict_test, axis=1)

    model = xgb.XGBClassifier(max_depth=3,
                              learning_rate=0.05,
                              n_estimators=10,
                              silent=True)
    model.fit(predict_train, train_label)
    model_dict['ensemble'] = model
    y_xgb_train_ens = model.predict(predict_train)
    y_xgb_v_ens = model.predict(predict_test)

    print("Train Accuracy [Ens]: ", accuracy_score(y_xgb_train_ens,
                                                   train_label))
    print("Validation Accuracy [Ens]: ",
          accuracy_score(y_xgb_v_ens, test_label))

    return model_dict
Пример #4
0
    for i in range(model_config['days']):
        for k in features[dow[i]]:
            feature_concat.append(features[dow[i]][k])

    data_feature = np.concatenate(feature_concat, axis=1)
    data = data_feature
    label = label

    return data, label


#srcPath = '/home/ubuntu/dataset/etf_prediction/all_feature_data_Nm_1_MinMax_94.pkl'
srcPath = '../Data/0525/all_feature_data_Nm_1_MinMax_120.pkl'
metaPath = '../Data/0525/all_meta_data_Nm_1_MinMax_120.pkl'
tv_gen = dp.train_validation_generaotr()
*_, meta = gu.read_metafile(metaPath)
f = tv_gen._load_data(srcPath)
mConfig = open(
    '/home/dashmoment/workspace/etf_prediction/trainer/config/20180526/best_config_xgb_dow_all.pkl',
    'rb')
#mConfig =  open('/home/ubuntu/shared/workspace/etf_prediction/trainer/config/best_config_xgb_dow_all.pkl', 'rb')
best_config = pickle.load(mConfig)

predict_ud = {}

for s in stock_list:
    predict_ud[s] = []
    for predict_day in predict_days:

        model_config = best_config[s][predict_day]
Пример #5
0
#stock_list =  ['0050',  '0052', '0053', '0054', '0055', '0056', '0057', '0058', '0059', '006201',
#               '006203', '006204', '006208','00690', '00692', '00701', '00713']

stock_list = ['0055']
score = {}
for sk in stock_list:

    score[sk] = {}
    print('Scoring stock: ', sk)

    src_path = '/home/ubuntu/dataset/etf_prediction/ETF_member/all_feature_data_Nm_1_MinMax_94_' + str(
        sk) + '.pkl'
    meta_path = '/home/ubuntu/dataset/etf_prediction/ETF_member/all_meta_data_Nm_1_MinMax_94_' + str(
        sk) + '.pkl'
    meta = gu.read_metafile(meta_path)
    #    _dp = dp.data_processor(src_path,
    #                            lagday = lagday, period=['20130101', '20180311'],
    #                            stockList = [sk])

    _dp = dp.data_processor(src_path,
                            lagday=lagday,
                            period=['20160101', '2016531'])

    clean_stock = _dp.clean_data()
    train_val_set = _dp.split_train_val_set_mstock(clean_stock, 0.01)

    train_fe = ens.feature_extractor(train_val_set['train'], None)
    test_fe = ens.feature_extractor(train_val_set['test'], None)
    train_data_ = train_fe.ratio()
    test_data_ = test_fe.ratio()
Пример #6
0
    def generate_train_val_set_mStock(
        self,
        filepath,
        stock_IDs,
        train_windows,
        predict_windows,
        train_val_ratio,
        is_special_list=False,
        metafile='/home/dashmoment/workspace/etf_prediction/Data/all_meta_data_Nm[0]_59.pkl'
    ):

        #    train_windows = 50
        #    predict_windows = 5
        #    train_val_ratio = 0.2
        #    filepath = './Data/all_feature_data_Nm[0]_59.pkl'

        *_, feature_names = ut.read_metafile(metafile)

        testSet = self._load_data(filepath)

        clean_stock = {}
        missin_feature = []

        if is_special_list:

            special_list = {
                '00690': "20170330",
                '00692': "20170516",
                '00701': "20170816",
                '00713': "20170927"
            }

            for s in special_list:

                mask = (testSet.columns > special_list[s])
                cut_testSet = testSet.iloc[:, mask]

                stock_s = cut_testSet.loc[s]

                clean_set = []
                [clean_set.append(row) for row in stock_s]

                clean_set = np.vstack(clean_set)

                tmpDF = pd.DataFrame(clean_set, columns=feature_names)
                missin_feature.append(
                    tmpDF.columns[tmpDF.isnull().any()].tolist())
                tmpDF = tmpDF.dropna(axis=[1])
                clean_stock[s] = tmpDF

            all_stock_list = stock_IDs + ["00690", "00692", "00701", "00713"]
        else:
            all_stock_list = stock_IDs

        for s in stock_IDs:
            stock = testSet.loc[s]
            clean_set = []
            [clean_set.append(row) for row in stock]
            clean_set = np.vstack(clean_set)
            tmpDF = pd.DataFrame(clean_set, columns=feature_names)
            if is_special_list:
                clean_stock[s] = tmpDF.drop(missin_feature[-1], axis=1)
            else:
                clean_stock[s] = tmpDF

        train = []
        validation = []
        train_raw = {}
        validation_raw = {}

        for s in all_stock_list:

            tmp_train, tmp_validation = self._split_train_val_side_by_side(
                clean_stock[s], train_windows, predict_windows,
                train_val_ratio)
            train.append(tmp_train)
            validation.append(tmp_validation)

            train_raw[s] = tmp_train
            validation_raw[s] = tmp_validation

        train = np.vstack(train)
        validation = np.vstack(validation)

        return train, validation, train_raw, validation_raw, missin_feature
Пример #7
0
import sys
sys.path.append('../')
import numpy as np

import hparam as conf
import evaluation_zoo as evalf
from utility import general_utility

conf_reg = conf.config(
    'test_onlyEnc_biderect_gru_nospecialstock_cls').config['common']
close_price_mean_var, *_ = general_utility.read_metafile(
    conf_reg['meta_file_path'])

mean = close_price_mean_var.mean_[0]
std = np.sqrt(close_price_mean_var.var_[0])
stockID = ['0050']

reg = evalf.regression_score(conf_reg, stockID, mean, std)
reg_score, *_ = reg.regression_score()

r2Cls = evalf.regression2Cls_score(conf_reg, stockID, mean, std)
r2Cls_predict, *_ = r2Cls.regression2Cls_score()

conf_cls = conf.config(
    'test_onlyEnc_biderect_gru_nospecialstock_cls').config['common']
cls = evalf.classification_score(conf_cls, stockID)
cls_score, predict_s, gt = cls.classification_score()