def select_features(): features = load('../../data/interim/features.pickle') lookback = 1 model = build_model(features, lookback) xgb.plot_importance(model) if lookback == 1: fig = plt.figure(figsize=(8, 8)) plt.xticks(rotation='vertical') plt.barh([i for i in range(len(model.feature_importances_))], model.feature_importances_.tolist(), tick_label=features.columns[list(range(6,14)) + list(range(15, features.shape[1]))]) plt.title('Istotność cech') plt.xlim((0, 0.3)) plt.ylabel('Cecha') plt.xlabel('Istotność') plt.show() else: feature_importances = calculate_feature_importances(features, lookback, model) plot_feature_importance(feature_importances) selected_features, selected_features_with_lookback = threshold_features(feature_importances, lookback) print('Selected features with trees: ', selected_features) not_selected_features = list(set(features.columns.to_list()).difference(set(selected_features))) print('Not selected features: ', not_selected_features) save(selected_features, '../../data/interim/selected_features_labels_trees.pickle') save(features[selected_features], '../../data/processed/selected_features_trees.pickle') save(selected_features_with_lookback, '../../data/interim/selected_features_labels_with_lookback_trees.pickle')
def build_data_trading_plot(): features = load('../../data/processed/features_amazon_corr.pickle') targets = load('../../data/processed/targets_amazon.pickle') not_relevant_days = 1660 features, targets = drop_not_relevant(features, targets, not_relevant_days) test_size = int(0.05 * features.shape[0]) val_size = int(0.15 * features.shape[0]) data, name = build_data(features, targets, lookback=1, scaled=False, encode_binary=False, test_size=test_size, val_size=val_size, pct_change=False) save(data, '../../data/timeseries/' + name + '_trading_vis_amazon.pickle')
def build_features(): data = load('../../data/raw/stock_data.pickle') # features = data['tesla'] features = data['amazon'] features = get_technical_indicators(features) # features = get_corr_assets(features) features = get_fourier_transforms(features) # features = get_automotive_industry_close_prices(features, data) # save(features, '../../data/interim/features.pickle') save(features, '../../data/interim/features_amazon.pickle')
def build_default_data(): features = load('../../data/processed/features_amazon_corr.pickle') targets = load('../../data/processed/targets_amazon.pickle') not_relevant_days = 1660 features, targets = drop_not_relevant(features, targets, not_relevant_days) test_size = int(0.05 * features.shape[0]) val_size = int(0.15 * features.shape[0]) for encode_binary in [True, False]: for scaled in [True, False]: for lookback in [1, 60]: data, name = build_data(features, targets, lookback=lookback, scaled=scaled, encode_binary=encode_binary, test_size=test_size, val_size=val_size, pct_change=True) save(data, '../../data/timeseries/' + name + '_amazon.pickle')
def save_targets(): data = load('../../data/raw/stock_data.pickle') save(pd.DataFrame(data['amazon'][['Date', 'Close']], columns=['Date', 'Close']), '../../data/processed/targets_amazon.pickle')
from src.trading_simulation.simulation import Simulation from src.trading_simulation.strategy import SimpleStrategy, BuyAndHold from src.utils.io import load, save import numpy as np import pandas as pd if __name__ == '__main__': init_investment = 5000 # targets = load('../../data/processed/targets.pickle') targets = load('../../data/processed/targets_amazon.pickle') date = pd.DataFrame(targets['Date']) # Load model predictions dense = load('../../data/predictions/dense.pickle') gru = load('../../data/predictions/gru.pickle') # gru = load('../../data/predictions/gru_amazon.pickle') pseudo_random = load('../../data/predictions/pseudo_random.pickle') # pseudo_random = load('../../data/predictions/pseudo_random_amazon.pickle') lstm = load('../../data/predictions/lstm.pickle') conv_lstm = load('../../data/predictions/conv_lstm.pickle') models = [pseudo_random, dense, gru, lstm, conv_lstm] model_names = ['pseudo_random', 'dense', 'gru', 'lstm', 'conv_lstm'] # load stock return for test set stock_returns = load( '../../data/timeseries/data_lookback_1_notbinary_notscaled.pickle')[5] # stock_returns = load('../../data/timeseries/data_lookback_1_notbinary_notscaled_amazon.pickle')[5] stock_returns = stock_returns.reshape(1, stock_returns.shape[0]).tolist()[0] # load stock prices for test set
def plot_validation_vs_training(model): eval_result = model.evals_result() training_rounds = range(len(eval_result['validation_0']['rmse'])) plt.scatter(x=training_rounds, y=eval_result['validation_0']['rmse'], label='Training Error') plt.scatter(x=training_rounds, y=eval_result['validation_1']['rmse'], label='Validation Error') plt.xlabel('Iterations') plt.ylabel('RMSE') plt.title('Training Vs Validation Error') plt.legend() plt.show() def plot_feature_importance(feature_importances): rc('xtick', labelsize=6) rc('ytick', labelsize=6) fig = plt.figure(figsize=(10, 10)) plt.xticks(rotation='vertical') plt.barh(range(100), feature_importances.iloc[:100, 1]) plt.yticks(range(100), feature_importances.iloc[:100, 0]) plt.title('Feature importance') plt.show() if __name__ == '__main__': features = load('../../data/interim/features.pickle') plot_technical_indicators(features, 500)
def select_features(): features = load('../../data/interim/features_amazon.pickle') date = features['Date'] features.drop('Date', inplace=True, axis=1) # calculate difference between rows features_diff = features.pct_change().replace([np.inf, -np.inf], np.nan) # impute NaN values imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean') imp_mean.fit(features_diff) features_diff = imp_mean.transform(features_diff) # features_diff.dropna() features_diff = pd.DataFrame(data=features_diff, columns=features.columns) corr_matrix = features_diff.corr().abs() # calculate covariance matrix close_price_cov = pd.DataFrame( corr_matrix['Close'].sort_values(ascending=False)) # select features cov_threshold = 0.001 selected_features = close_price_cov[ close_price_cov['Close'] > cov_threshold].index.to_list() # print('Selected features with covariance: ', selected_features) not_selected_features = list( set(features.columns.to_list()).difference(set(selected_features))) print('Not selected features: ', not_selected_features) save(selected_features, '../../data/interim/selected_features_labels_cov.pickle') features = features[selected_features] # Select upper triangle of correlation matrix upper = corr_matrix.where( np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) # Find index of feature columns with correlation greater than 0.95 to_drop = [column for column in upper.columns if any(upper[column] > 0.95)] for f in not_selected_features: if f in to_drop: to_drop.remove(f) features.drop(to_drop, axis=1, inplace=True) print(to_drop) # normalize # for col in features_diff.columns: # features_diff[col] = preprocessing.scale(features_diff[col].values) # # # imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean') # features = pd.DataFrame(data=imp_mean.fit_transform(features), columns=features.columns) # ftrs_pct = features.pct_change() # ftrs_pct = ftrs_pct.replace([np.inf, -np.inf], np.nan) # ftrs_pct = pd.DataFrame(data=imp_mean.fit_transform(ftrs_pct), columns=ftrs_pct.columns) # ftrs_pct.astype(np.float64) # ftrs_pct['Date'] = date features['Date'] = date features.set_index('Date', inplace=True) save(features, '../../data/processed/features_amazon_corr.pickle') print()