self.dict_test_and_train['X_train'] = np.concatenate( [self.dict_test_and_train['X_train'], in_data['X_train']], axis=1) def get_start_end_date(self): print(self.data['start_end_date']) start = md.get_date_from_UTC_ms(self.data['start_end_date']['start']) end = md.get_date_from_UTC_ms(self.data['start_end_date']['end']) return start, end from copy import copy start_date = md.get_datetime_from_string('2017-01-19') end_date = md.get_datetime_from_string('2018-05-1') feature_set = feature_list.get_feature_set() feature_sets = feature_set[1:] reference_group = feature_set[0] dict_perf_feats = {} break_flag = False for analysis_set in feature_sets[20:]: if (break_flag): break for feature in analysis_set: dict_perf_feats[feature] = [] for secondary_set in feature_sets: if (analysis_set != secondary_set): features = copy(reference_group) features.extend(analysis_set) features.extend(secondary_set) ##fit, return feature importances
import mylib_dataset as md import tensorflow as tf from tensorflow.contrib.tensor_forest.client import random_forest import matplotlib.pyplot as plt ##################### get log for estimator proc_time_start = datetime.datetime.now() ##### COMMENT ## train data has low variance, better choose from test data #### path_features_imp = '/home/catalin/git_workspace/disertatie/dict_perf_feats.pkl' ordered_values_mean, ordered_values_var = md.get_feature_importances_mean( path_features_imp) features = feature_list.get_feature_set()[0] features.extend(ordered_values_mean['keys'][:30]) features = feature_list.get_features_list() blockchain_indicators = feature_list.get_blockchain_indicators() #features = feature_list.get_features_list() import pickle with open('/home/catalin/python/force_data.pickle', 'rb') as handle: force_data = pickle.load(handle) max_depths = [] for i in range(1, 50): max_depths.append(i) roc_curves = {} auc_scores = {} for max_depth in max_depths: