def predict_n_steps_ahead(self,df, idx_target,n_steps,do_debug=False): X, Y = tools_DF.df_to_XY(df, idx_target) Y_pred_train = self.do_train(X, Y) Y_pred_ahead,CI_pred_ahead = self.classifier.predict_n_steps_ahead(n_steps) df_step = pd.DataFrame( {'predict_ahead' : Y_pred_ahead, 'predict_ahead_min': CI_pred_ahead[:,0], 'predict_ahead_max': CI_pred_ahead[:,1]}) if do_debug: df_retro = pd.DataFrame({'GT': df.iloc[:, idx_target], 'predict': numpy.full(df.shape[0], numpy.nan), 'predict_ahead': numpy.full(df.shape[0], numpy.nan), 'predict_ahead_min': numpy.full(df.shape[0], numpy.nan), 'predict_ahead_max': numpy.full(df.shape[0], numpy.nan), }) df_step['GT'] = numpy.full(n_steps, numpy.nan) df_step['predict'] = numpy.full(n_steps, numpy.nan) df_retro = df_retro.append(df_step, ignore_index=True) x_range = [max(0, df_retro.shape[0] - n_steps * 20), df_retro.shape[0]] self.Plotter.TS_matplotlib(df_retro, [0, 2, 1], None, idxs_fill=[3, 4], x_range=x_range,filename_out='pred_ahead_%s.png' % (self.classifier.name)) return df_step # ---------------------------------------------------------------------------------------------------------------------
def E2E_train_test(self, df, idx_target, ratio=0.5,do_debug=False): X, Y = tools_DF.df_to_XY(df,idx_target) train_size = int(Y.shape[0]*ratio) Y_train_pred,Y_test_pred = self.do_train_test(X,Y,train_size) if do_debug: df_train = pd.DataFrame({'fact': Y[:train_size], 'pred': Y_train_pred}) df_test = pd.DataFrame({'fact': Y[train_size:], 'pred': Y_test_pred}) df_train.to_csv(self.folder_out + 'train.csv', index=False,sep='\t') df_test.to_csv(self.folder_out + 'test_%s.csv'%self.classifier.name , index=False,sep='\t') self.Plotter.TS_seaborn(df_train, idxs_target=[0, 1], idx_feature=None, filename_out='train_%s.png'%self.classifier.name) self.Plotter.TS_seaborn(df_test , idxs_target=[0, 1], idx_feature=None, filename_out='test_%s.png'%self.classifier.name) self.Plotter.plot_fact_predict(Y[:train_size], Y_train_pred,filename_out='train_fact_pred_%s.png'%self.classifier.name) self.Plotter.plot_fact_predict(Y[train_size:], Y_test_pred ,filename_out='test_fact_pred_%s.png'%self.classifier.name) x_max = 1.2*max(numpy.abs(Y[:train_size] - Y_train_pred).max(),numpy.abs(Y[train_size:] - Y_test_pred).max()) self.Plotter.plot_hist(Y[:train_size] - Y_train_pred, x_range=[-x_max,+x_max],filename_out='train_err_%s.png'%self.classifier.name) self.Plotter.plot_hist(Y[train_size:] - Y_test_pred , x_range=[-x_max,+x_max],filename_out='test_err_%s.png' % self.classifier.name) return
def plot_ISOMAP(self,df, idx_target,palette='tab10',filename_out=None): X, Y = tools_DF.df_to_XY(df, idx_target) X_ = Isomap(n_components=2).fit_transform(X) df = pd.DataFrame(numpy.concatenate((Y.reshape(-1, 1), X_), axis=1), columns=['ISOMAP', 'x0', 'x1']) df = df.astype({'ISOMAP': 'int32'}) self.plot_2D_features_v3(df, remove_legend=True,palette=palette, filename_out=filename_out) return
def plot_2D_features_cumul(self, df, figsize=(3.5,3.5),remove_legend=False,filename_out=None): def max_element_by_value(dct):return max(dct.items(), key=operator.itemgetter(1)) fig = plt.figure(figsize=figsize) fig = self.turn_light_mode(fig) X,Y = tools_DF.df_to_XY(df,idx_target=0) dict_pos,dict_neg ={},{} for x in X[Y>0]: if tuple(x) not in dict_pos:dict_pos[tuple(x)]=1 else:dict_pos[tuple(x)]+=1 for x in X[Y<=0]: if tuple(x) not in dict_neg:dict_neg[tuple(x)]=1 else:dict_neg[tuple(x)]+=1 col_neg = (0, 0.5, 1, 1) col_pos = (1, 0.5, 0, 1) col_gray = (0.5, 0.5, 0.5, 1) min_size = 4 max_size = 20 norm = max(max_element_by_value(dict_pos)[1], max_element_by_value(dict_neg)[1]) / max_size for x in dict_pos.keys(): if tuple(x) not in dict_neg.keys(): plt.plot(x[0], x[1], 'ro', color=col_pos, markeredgewidth=0, markersize=max(min_size, dict_pos[tuple(x)] / norm)) else: plt.plot(x[0], x[1], 'ro', color=col_gray, markeredgewidth=0, markersize=max(min_size, (dict_pos[tuple(x)] + dict_neg[tuple(x)]) / norm)) if dict_pos[tuple(x)] < dict_neg[tuple(x)]: plt.plot(x[0], x[1], 'ro', color=col_neg, markeredgewidth=0, markersize=max(min_size, (dict_neg[tuple(x)] - dict_pos[tuple(x)]) / norm)) else: plt.plot(x[0], x[1], 'ro', color=col_pos, markeredgewidth=0, markersize=max(min_size, (-dict_neg[tuple(x)] + dict_pos[tuple(x)]) / norm)) for x in dict_neg: if tuple(x) not in dict_pos: plt.plot(x[0], x[1], 'ro', color=col_neg, markeredgewidth=0, markersize=max(min_size, dict_neg[tuple(x)] / norm)) plt.grid(color=self.clr_grid) if remove_legend: plt.legend([], [], frameon=False) plt.xlabel(df.columns[1]) plt.ylabel(df.columns[1]) plt.tight_layout() if filename_out is not None: plt.savefig(self.folder_out + filename_out, facecolor=fig.get_facecolor()) return fig
def ex_view_tree(df,idx_target): df = df.dropna() df = tools_DF.hash_categoricals(df) X, Y = tools_DF.df_to_XY(df, idx_target, keep_categoirical=False) columns = df.columns.to_numpy() idx = numpy.delete(numpy.arange(0, len(columns)), idx_target) columns = columns[idx] C = classifier_DTree.classifier_DT(max_depth=3,folder_out=folder_out) C.learn(X, Y,columns,do_debug=True) return