示例#1
0
    def predict_n_steps_ahead(self,df, idx_target,n_steps,do_debug=False):
        X, Y = tools_DF.df_to_XY(df, idx_target)
        Y_pred_train = self.do_train(X, Y)
        Y_pred_ahead,CI_pred_ahead = self.classifier.predict_n_steps_ahead(n_steps)

        df_step = pd.DataFrame( {'predict_ahead' : Y_pred_ahead,
                                'predict_ahead_min': CI_pred_ahead[:,0],
                                'predict_ahead_max': CI_pred_ahead[:,1]})

        if do_debug:
            df_retro = pd.DataFrame({'GT': df.iloc[:, idx_target],
                                     'predict': numpy.full(df.shape[0], numpy.nan),
                                     'predict_ahead': numpy.full(df.shape[0], numpy.nan),
                                     'predict_ahead_min': numpy.full(df.shape[0], numpy.nan),
                                     'predict_ahead_max': numpy.full(df.shape[0], numpy.nan),
                                     })


            df_step['GT'] = numpy.full(n_steps, numpy.nan)
            df_step['predict'] = numpy.full(n_steps, numpy.nan)

            df_retro = df_retro.append(df_step, ignore_index=True)
            x_range = [max(0, df_retro.shape[0] - n_steps * 20), df_retro.shape[0]]
            self.Plotter.TS_matplotlib(df_retro, [0, 2, 1], None, idxs_fill=[3, 4], x_range=x_range,filename_out='pred_ahead_%s.png' % (self.classifier.name))

        return df_step
# ---------------------------------------------------------------------------------------------------------------------
示例#2
0
    def E2E_train_test(self, df, idx_target, ratio=0.5,do_debug=False):

        X, Y = tools_DF.df_to_XY(df,idx_target)
        train_size = int(Y.shape[0]*ratio)

        Y_train_pred,Y_test_pred = self.do_train_test(X,Y,train_size)

        if do_debug:
            df_train = pd.DataFrame({'fact': Y[:train_size], 'pred': Y_train_pred})
            df_test =  pd.DataFrame({'fact': Y[train_size:], 'pred': Y_test_pred})
            df_train.to_csv(self.folder_out + 'train.csv', index=False,sep='\t')
            df_test.to_csv(self.folder_out + 'test_%s.csv'%self.classifier.name , index=False,sep='\t')

            self.Plotter.TS_seaborn(df_train, idxs_target=[0, 1], idx_feature=None, filename_out='train_%s.png'%self.classifier.name)
            self.Plotter.TS_seaborn(df_test , idxs_target=[0, 1], idx_feature=None, filename_out='test_%s.png'%self.classifier.name)

            self.Plotter.plot_fact_predict(Y[:train_size], Y_train_pred,filename_out='train_fact_pred_%s.png'%self.classifier.name)
            self.Plotter.plot_fact_predict(Y[train_size:], Y_test_pred ,filename_out='test_fact_pred_%s.png'%self.classifier.name)

            x_max = 1.2*max(numpy.abs(Y[:train_size] - Y_train_pred).max(),numpy.abs(Y[train_size:] - Y_test_pred).max())

            self.Plotter.plot_hist(Y[:train_size] - Y_train_pred, x_range=[-x_max,+x_max],filename_out='train_err_%s.png'%self.classifier.name)
            self.Plotter.plot_hist(Y[train_size:] - Y_test_pred , x_range=[-x_max,+x_max],filename_out='test_err_%s.png' % self.classifier.name)

        return
示例#3
0
 def plot_ISOMAP(self,df, idx_target,palette='tab10',filename_out=None):
     X, Y = tools_DF.df_to_XY(df, idx_target)
     X_ = Isomap(n_components=2).fit_transform(X)
     df = pd.DataFrame(numpy.concatenate((Y.reshape(-1, 1), X_), axis=1), columns=['ISOMAP', 'x0', 'x1'])
     df = df.astype({'ISOMAP': 'int32'})
     self.plot_2D_features_v3(df, remove_legend=True,palette=palette, filename_out=filename_out)
     return
示例#4
0
    def plot_2D_features_cumul(self, df, figsize=(3.5,3.5),remove_legend=False,filename_out=None):
        def max_element_by_value(dct):return max(dct.items(), key=operator.itemgetter(1))


        fig = plt.figure(figsize=figsize)
        fig = self.turn_light_mode(fig)

        X,Y = tools_DF.df_to_XY(df,idx_target=0)

        dict_pos,dict_neg ={},{}
        for x in X[Y>0]:
            if tuple(x) not in dict_pos:dict_pos[tuple(x)]=1
            else:dict_pos[tuple(x)]+=1

        for x in X[Y<=0]:
            if tuple(x) not in dict_neg:dict_neg[tuple(x)]=1
            else:dict_neg[tuple(x)]+=1


        col_neg  = (0, 0.5, 1, 1)
        col_pos  = (1, 0.5, 0, 1)
        col_gray = (0.5, 0.5, 0.5, 1)

        min_size = 4
        max_size = 20
        norm = max(max_element_by_value(dict_pos)[1], max_element_by_value(dict_neg)[1]) / max_size

        for x in dict_pos.keys():
            if tuple(x) not in dict_neg.keys():
                plt.plot(x[0], x[1], 'ro', color=col_pos, markeredgewidth=0, markersize=max(min_size, dict_pos[tuple(x)] / norm))
            else:
                plt.plot(x[0], x[1], 'ro', color=col_gray, markeredgewidth=0, markersize=max(min_size, (dict_pos[tuple(x)] + dict_neg[tuple(x)]) / norm))

                if dict_pos[tuple(x)] < dict_neg[tuple(x)]:
                    plt.plot(x[0], x[1], 'ro', color=col_neg, markeredgewidth=0, markersize=max(min_size, (dict_neg[tuple(x)] - dict_pos[tuple(x)]) / norm))
                else:
                    plt.plot(x[0], x[1], 'ro', color=col_pos, markeredgewidth=0, markersize=max(min_size, (-dict_neg[tuple(x)] + dict_pos[tuple(x)]) / norm))

        for x in dict_neg:
            if tuple(x) not in dict_pos:
                plt.plot(x[0], x[1], 'ro', color=col_neg, markeredgewidth=0, markersize=max(min_size, dict_neg[tuple(x)] / norm))

        plt.grid(color=self.clr_grid)
        if remove_legend:
            plt.legend([], [], frameon=False)

        plt.xlabel(df.columns[1])
        plt.ylabel(df.columns[1])

        plt.tight_layout()
        if filename_out is not None:
            plt.savefig(self.folder_out + filename_out, facecolor=fig.get_facecolor())
        return fig
示例#5
0
def ex_view_tree(df,idx_target):
    df = df.dropna()
    df = tools_DF.hash_categoricals(df)
    X, Y = tools_DF.df_to_XY(df, idx_target, keep_categoirical=False)

    columns = df.columns.to_numpy()
    idx = numpy.delete(numpy.arange(0, len(columns)), idx_target)
    columns = columns[idx]

    C = classifier_DTree.classifier_DT(max_depth=3,folder_out=folder_out)
    C.learn(X, Y,columns,do_debug=True)

    return