示例#1
0
    def _get_dataset_obj(self):

        self.train_loader, \
        self.X_train, \
        self.target_errors_train, \
        dataset_obj, \
        self.attributes = utils.load_data(self.args.data_path, self.args.batch_size,
                                        is_train=True,
                                        get_data_idxs=False)

        self.test_loader, self.X_test, self.target_errors_test, _, _ = utils.load_data(
            self.args.data_path, 
            self.args.batch_size, 
            is_train=False
        )
        # -- clean versions for evaluation
        _, self.X_train_clean, _, _, _ = utils.load_data(
            self.args.data_path, 
            self.args.batch_size,
            is_train=True, 
            is_clean=True, 
            stdize_dirty=True
        )
        _, self.X_test_clean, _, _, _ = utils.load_data(
            self.args.data_path, 
            self.args.batch_size, 
            is_train=False,
            is_clean=True, 
            stdize_dirty=True
        )
        return dataset_obj
示例#2
0
def main(args):

    # Load datasets
    _, X_train, target_errors_train, _, _ = utils.load_data(args.data_folder, args.batch_size, is_train=True, is_one_hot=args.is_one_hot)
    # _, X_test, target_errors_test, _, _ = utils.load_data(folder_path, args.batch_size, is_train=False) # NOTE: used in hyper-parameter selection

    # Vest parameters from CV
    clf = svm.OneClassSVM(nu=0.2, kernel="rbf", gamma=0.1)
    clf.fit(X_train)

    target_row = (target_errors_train.sum(dim=1)>0).numpy()

    outlier_score_row = -clf.score_samples(X_train)

    auc_row = auc_compute(target_row, outlier_score_row)
    avpr_row = avpr_compute(target_row, outlier_score_row)

    print('OC-SVM Train - AUC: ' + str(auc_row) + ', AVPR: ' + str(avpr_row))

    #Save results into csv
    if args.save_on:

        # create folder for saving experiment data (if necessary)
        folder_output = args.output_folder + "/" + args.outlier_model

        try:
            os.makedirs(folder_output)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        columns = ['AUC','AVPR']
        results = {'AUC': [auc_row], 'AVPR': [avpr_row]}

        #Dataframe
        df_out = pd.DataFrame(data=results, columns=columns)
        df_out.index.name = "Epochs"
        df_out.to_csv(folder_output + "/train_epochs_data.csv")
示例#3
0
def main(args_in):

    #### MAIN ####

    # saving data of experiment to folder is on
    if args_in.save_on:
        # create folder for saving experiment data (if necessary)
        folder_output = args_in.output_folder + "/"

        try:
            os.makedirs(folder_output)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        # structs for saving data
        losses_save = {"train":{},"test":{}}


    # dtype definitions for runing
    if args_in.cuda_on:
        dtype_float = torch.cuda.FloatTensor
        dtype_byte = torch.cuda.ByteTensor
    else:
        dtype_float = torch.FloatTensor
        dtype_byte = torch.ByteTensor

    print(args_in)

    # Load datasets
    train_loader, X_train, target_errors_train, dataset_obj_train, attributes = utils.load_data(args_in.data_folder, args_in.batch_size,
                                                                is_train=True, get_data_idxs=True)
    args_in.dataset_defs = dataset_obj_train
    X_train = X_train.type(dtype_float)

    test_loader, X_test, target_errors_test, dataset_obj_test, _ = utils.load_data(args_in.data_folder, args_in.batch_size, is_train=False)
    X_test = X_test.type(dtype_float)

    # -- clean versions for data repair evaluation (standardized according to the dirty data statistics)
    train_loader_clean, X_train_clean, _, dataset_obj_clean, _ = utils.load_data(args_in.data_folder, args_in.batch_size,
                                                                is_train=True, is_clean=True, stdize_dirty=True)
    X_train_clean = X_train_clean.type(dtype_float)

    test_loader_clean, X_test_clean, _, _, _ = utils.load_data(args_in.data_folder, args_in.batch_size, is_train=False,
                                                                is_clean=True, stdize_dirty=True)
    X_test_clean = X_test_clean.type(dtype_float)


    ### Run CondPred Model ###
    model = CondPred(args_in.dataset_defs, args_in)

    if args_in.cuda_on:
        model.cuda()


    # define optimizers for each cond pred model
    optimizer_dict = OrderedDict()
    for col_name, col_type, col_size in args_in.dataset_defs.feat_info:
        if args_in.base_type == 'linear':
            optimizer_dict[col_name] = optim.SGD(model.cond_models[col_name].parameters(),
                                                lr=args_in.lr,
                                                weight_decay=args_in.l2_reg,
                                                nesterov=args_in.nest_mom, # default: False
                                                momentum=args_in.mom_val)
        else:
            optimizer_dict[col_name] = optim.Adam(model.cond_models[col_name].parameters(),
                                                lr=args_in.lr,
                                                weight_decay=args_in.l2_reg)


    # Run epochs
    for epoch in range(1, args_in.number_epochs + 1):

        training_phase(args_in, model, optimizer_dict, train_loader, epoch)

        # Train set evaluation
        evaluation_phase(args_in, model, X_train, X_train_clean, target_errors_train,
                         losses_save, epoch, mode='train')
        # Test set evaluation
        evaluation_phase(args_in, model, X_test, X_test_clean, target_errors_test,
                         losses_save, epoch, mode='test')


    if args_in.save_on:

        ### Train Data
        outlier_metrics_train, repair_metrics_train, outlier_scores_train = \
            evaluation_phase(args_in, model, X_train, X_train_clean, target_errors_train, [], -1, mode='train')
            # (outlier_score_cells_train, outlier_score_rows_train)

        store_metrics_final('train', outlier_scores_train, args_in.dataset_defs, attributes, outlier_metrics_train, repair_metrics_train,
                            target_errors_train, losses_save,
                            folder_output)

        ### Test Data
        outlier_metrics_test, repair_metrics_test, outlier_scores_test = \
            evaluation_phase(args_in, model, X_test, X_test_clean, target_errors_test, [], -1, mode='test')

        store_metrics_final('test', outlier_scores_test, args_in.dataset_defs, attributes, outlier_metrics_test, repair_metrics_test,
                            target_errors_test, losses_save,
                            folder_output)


        # save model parameters
        model.cpu()
        torch.save(model.state_dict(), folder_output + "/model_params.pth")

        # remove non-serializable stuff
        del args_in.dataset_defs

        # save to .json file the args that were used for running the model
        with open(folder_output + "/args_run.json", "w") as outfile:
            json.dump(vars(args_in), outfile, indent=4, sort_keys=True)
示例#4
0
def main(args):

    # Load datasets
    train_loader, X_train, target_errors_train, dataset_obj_train, attributes = utils.load_data(args.data_folder, args.batch_size, 
                                                                                                is_train=True)
    train_loader_clean, X_train_clean, _, dataset_obj_clean, _ = utils.load_data(args.data_folder, args.batch_size,
                                                                        is_train=True, is_clean=True, stdize_dirty=True)

    dataset_obj = dataset_obj_train
    df_data_train = dataset_obj_train.df_dataset_instance

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        p_mat_train, dict_densities, _, repair_mat = get_prob_matrix(df_data_train, dataset_obj.cat_cols, n_comp_max=40)


    mean_error_dirty, features_errors_dirty = error_computation(dataset_obj_clean, X_train_clean.detach().numpy(),
                                                    repair_mat, dict_densities, target_errors_train.detach().numpy())
    mean_error_clean, features_errors_clean = error_computation(dataset_obj_clean, X_train_clean.detach().numpy(),
                                                    repair_mat, dict_densities, (1-target_errors_train).detach().numpy())

    #print(features_errors)
    logp_mat_train = np.log(p_mat_train + 1e-9)

    target_row_train = (target_errors_train.sum(dim=1)>0).numpy()

    # Uses the NLL score as outlier score (just like VAE outlier score)
    outlier_score_cell_train = -logp_mat_train
    outlier_score_row_train = -logp_mat_train.sum(axis=1)


    ## Cell metrics
    auc_cell_train, auc_feats = get_auc_metrics(target_errors_train, outlier_score_cell_train)
    avpr_cell_train, avpr_feats = get_avpr_metrics(target_errors_train, outlier_score_cell_train)

    print("AVPR per feature")
    print(avpr_feats)
    print("AUC per feature")
    print(auc_feats)

    ## Row metrics
    auc_row_train = auc_compute(target_row_train, outlier_score_row_train)
    avpr_row_train = avpr_compute(target_row_train, outlier_score_row_train)


    print('Marginals Prob. Train - Cell AUC: {}, Cell AVPR: {}, Row AUC: {}, Row AVPR: {}'.format(
                                    auc_cell_train, avpr_cell_train, auc_row_train, avpr_row_train))

    #Save results into csv
    if args.save_on:

        # create folder for saving experiment data (if necessary)
        folder_output = args.output_folder + "/" + args.outlier_model

        try:
            os.makedirs(folder_output)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        columns = ['AUC row','AVPR row','AUC cell','AVPR cell','Error repair on dirty pos', 'Error repair on clean pos']
        results = {'AUC row': [auc_row_train], 'AVPR row': [avpr_row_train],
                   'AUC cell': [auc_cell_train], 'AVPR cell': [avpr_cell_train],
                   'Error repair on dirty pos': [mean_error_dirty], 'Error repair on clean pos': [mean_error_clean]}


        #Dataframe
        df_out = pd.DataFrame(data=results, columns=columns)
        df_out.index.name = "Epochs"
        df_out.to_csv(folder_output + "/train_epochs_data.csv")

        # store AVPR for features (cell only)
        df_avpr_feat_cell = pd.DataFrame([], index=['AVPR'], columns=attributes)
        df_avpr_feat_cell.loc['AVPR'] = avpr_feats
        df_avpr_feat_cell.to_csv(folder_output + "/train_avpr_features.csv")

        # store AUC for features (cell only)
        df_auc_feat_cell = pd.DataFrame([], index=['AUC'], columns=attributes)
        df_auc_feat_cell.loc['AUC'] = auc_feats
        df_auc_feat_cell.to_csv(folder_output + "/train_auc_features.csv")

        df_errors_repair = pd.DataFrame([], index=['error_repair_dirtycells','error_repair_cleancells'], columns=attributes)
        df_errors_repair.loc['error_repair_dirtycells'] = features_errors_dirty
        df_errors_repair.loc['error_repair_cleancells'] = features_errors_clean
        df_errors_repair.to_csv(folder_output + "/train_error_repair_features.csv")
示例#5
0
def get_dataset(data_pars, task_type="train"):
    """
    :param data_pars:
    :param task_type:
    :return:
    """
    clean       = data_pars["data_pars"].get('clean', True)
    data_path   = data_pars["data_pars"]["data_path"]
    batch_size  = data_pars["data_pars"]["batch_size"]

    if task_type == 'pred_encode':
            train_loader, X_train, target_errors_train, dataset_obj,  attributes = utils.load_data(data_path, batch_size,
                                            is_train=True,
                                            get_data_idxs=False)

            return X_train
        
    elif task_type == 'pred_decode':
        train_loader, X_train, target_errors_train, dataset_obj,  attributes = utils.load_data(data_path, batch_size,
                                        is_train=True,
                                        get_data_idxs=False)

        return target_errors_train


    if not clean:
        if task_type == 'train':
            train_loader, X_train, target_errors_train, dataset_obj,  attributes = utils.load_data(data_path, batch_size,
                                            is_train=True,
                                            get_data_idxs=False)

            return train_loader, X_train, target_errors_train, dataset_obj, attributes
        
        elif task_type == 'test':

            test_loader, X_test, target_errors_test, _, _ = utils.load_data(
                data_path, batch_size, is_train=False
            )

            return test_loader, X_test, target_errors_test
        elif task_type == 'predict':
            train_loader, _, _, _,  _ = utils.load_data(data_path, batch_size,
                                            is_train=True,
                                            get_data_idxs=False)

            return train_loader
        
    # -- clean versions for evaluation
    else:

        if task_type == 'train':
            _, X_train_clean, _, _, _ = utils.load_data(
                data_path, batch_size, is_train=True, is_clean=True, stdize_dirty=True
            )

            return X_train_clean

        elif task_type == 'test':
            _, X_test_clean, _, _, _ = utils.load_data(
                data_path, batch_size, is_train=False, is_clean=True, stdize_dirty=True
            )

            return X_test_clean
示例#6
0
def main(args_in):

    #### MAIN ####

    # saving data of experiment to folder is on
    if args_in.save_on:
        # create folder for saving experiment data (if necessary)
        folder_output = args_in.output_folder
        args_in.folder_output = folder_output

        # structs for saving data
        args_in.losses_save = {"train": {}, "test": {}}

        try:
            os.makedirs(folder_output + '/')
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

    # dtype definitions for runing
    if args_in.cuda_on:
        dtype_float = torch.cuda.FloatTensor
        dtype_byte = torch.cuda.ByteTensor
    else:
        dtype_float = torch.FloatTensor
        dtype_byte = torch.ByteTensor

    # only one type of prior assumption on errors / outliers
    if (not args_in.l1_method) and (not args_in.l21_method):
        args_in.l21_method = True
    elif args_in.l1_method and args_in.l21_method:
        args_in.l21_method = False

    # Choose dataset to run on
    folder_path = args_in.data_folder

    # Load datasets
    train_loader, X_train, target_errors_train, dataset_obj_train, attributes = utils.load_data(
        folder_path,
        args_in.batch_size,
        is_train=True,
        get_data_idxs=True,
        is_one_hot=True)
    args_in.dataset_defs = dataset_obj_train
    args_in.train_loader = train_loader
    args_in.target_errors_train = target_errors_train.type(dtype_byte)
    X_train = X_train.type(dtype_float)

    test_loader, X_test, target_errors_test, dataset_obj_test, _ = utils.load_data(
        folder_path,
        args_in.batch_size,
        is_train=False,
        get_data_idxs=True,
        is_one_hot=True)
    args_in.test_loader = test_loader
    args_in.target_errors_test = target_errors_test.type(dtype_byte)
    X_test = X_test.type(dtype_float)

    # -- clean versions for data repair evaluation (standardized according to the dirty data statistics)
    train_loader_clean, X_train_clean, _, dataset_obj_clean, _ = utils.load_data(
        args_in.data_folder,
        args_in.batch_size,
        is_train=True,
        is_clean=True,
        is_one_hot=True,
        stdize_dirty=True)

    args_in.train_loader_clean = train_loader_clean
    X_train_clean = X_train_clean.type(dtype_float)

    test_loader_clean, X_test_clean, _, _, _ = utils.load_data(
        args_in.data_folder,
        args_in.batch_size,
        is_train=False,
        is_clean=True,
        is_one_hot=True,
        stdize_dirty=True)

    args_in.test_loader_clean = test_loader_clean
    X_test_clean = X_test_clean.type(dtype_float)

    # RAE model matrices
    rae_data_train = dict()
    rae_data_test = dict()

    rae_data_train['LD'] = torch.zeros_like(X_train).type(dtype_float)
    rae_data_test['LD'] = torch.zeros_like(X_test).type(dtype_float)

    rae_data_train['LS'] = X_train.clone()
    rae_data_test['LS'] = X_test.clone()

    rae_data_train['S'] = torch.zeros_like(X_train).type(dtype_float)
    rae_data_test['S'] = torch.zeros_like(X_test).type(dtype_float)

    # Run RAE model
    model = RAE(args_in.dataset_defs, args_in)

    if args_in.cuda_on:
        model.cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=args_in.lr,
                           weight_decay=args_in.l2_reg)

    for admm_iter in range(1, args_in.number_ADMM_iters + 1):

        # train
        converged_train = train(args_in, model, optimizer, rae_data_train,
                                X_train, X_train_clean, admm_iter)

        if converged_train:
            print("--> RAE for train data has converged!")

        # validation
        if args_in.turn_on_validation:
            test(args_in, model, optimizer, rae_data_test, X_test,
                 X_test_clean, admm_iter)

    if args_in.save_on:

        ### Train Data
        outlier_metrics_train, repair_metrics_train = evaluation_phase(
            args_in, model, rae_data_train, X_train, X_train_clean,
            target_errors_train, train_loader)

        store_metrics_final('train', args_in.dataset_defs, attributes,
                            outlier_metrics_train, repair_metrics_train,
                            target_errors_train, rae_data_train['S'], args_in)

        ### Test Data
        outlier_metrics_test, repair_metrics_test = evaluation_phase(
            args_in, model, rae_data_test, X_test, X_test_clean,
            target_errors_test, test_loader)

        store_metrics_final('test', args_in.dataset_defs, attributes,
                            outlier_metrics_test, repair_metrics_test,
                            target_errors_test, rae_data_test['S'], args_in)

        # save model parameters
        model.cpu()
        torch.save(model.state_dict(), folder_output + "/model_params.pth")

        # remove non-serializable stuff
        del args_in.dataset_defs  # = []
        del args_in.train_loader  # = []
        del args_in.target_errors_train  # = []
        del args_in.test_loader  # = []
        del args_in.target_errors_test  # = []
        del args_in.train_loader_clean
        del args_in.test_loader_clean
        del args_in.folder_output
        del args_in.losses_save

        # save to .json file the args that were used for running the model
        with open(folder_output + "/args_run.json", "w") as outfile:
            json.dump(vars(args_in), outfile, indent=4, sort_keys=True)
def main(args):

    # Load datasets
    train_loader, X_train, target_errors_train, dataset_obj_train, attributes = utils.load_data(
        args.data_folder,
        args.batch_size,
        is_train=True,
        is_one_hot=args.is_one_hot)
    test_loader, X_test, target_errors_test, _, _ = utils.load_data(
        args.data_folder, args.batch_size, is_train=False)

    df_data_train = dataset_obj_train.df_dataset_instance

    # Run Marginals to obtain cell log probs
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        p_mat_train, _, _, _ = get_prob_matrix(df_data_train,
                                               dataset_obj_train.cat_cols,
                                               n_comp_max=40)
    nll_marginal_cell = -np.log(p_mat_train + 1e-8)

    target_errors_row_train = (target_errors_train.sum(dim=1) > 0)
    target_row_train = target_errors_row_train.numpy()

    target_errors_row_test = (target_errors_test.sum(dim=1) > 0)
    target_row_test = target_errors_row_test.numpy()

    # Run OCSVM row outlier detection
    clf = svm.OneClassSVM(nu=0.2, kernel="rbf", gamma=0.1)
    clf.fit(X_train)

    outlier_score_row_train = -clf.score_samples(X_train)
    outlier_score_row_test = -clf.score_samples(X_test)

    # Platt Scaling (uses Logistic Regression) of OCSVM scores
    lr_calib = LogisticRegression(solver='lbfgs')
    lr_calib.fit(outlier_score_row_test.reshape(-1, 1), target_row_test)
    p_inlier_train = lr_calib.predict_proba(
        outlier_score_row_train.reshape(-1, 1))[:, 0]
    nll_inlier_row_train = -np.log(p_inlier_train + 1e-8)  # -log (p_inlier)

    # Row metrics
    auc_row_train = auc_compute(target_row_train, outlier_score_row_train)
    avpr_row_train = avpr_compute(target_row_train, outlier_score_row_train)
    ll_row_train = log_loss(target_row_train, outlier_score_row_train)

    auc_row_train_calibed = auc_compute(target_row_train, nll_inlier_row_train)
    avpr_row_train_calibed = avpr_compute(target_row_train,
                                          nll_inlier_row_train)
    ll_row_train_calibed = log_loss(target_row_train, 1. - p_inlier_train)

    print("AUC Prev. Calib.: {}".format(auc_row_train))
    print("AVPR Prev. Calib.: {}".format(avpr_row_train))
    print("Cross-Entropy Prev. Calib. {}".format(ll_row_train))

    # Re-check score is still good after calibration (AVPR and AUC should be same);
    # then Cross-Entropy should drop !!
    print("AUC Post. Calib.: {}".format(auc_row_train_calibed))
    print("AVPR Post. Calib.: {}".format(avpr_row_train_calibed))
    print("Cross-Entropy Post. Calib. {}".format(ll_row_train_calibed))

    # combine calibrated OCSVM and Marginals for cell outlier detection
    nll_cells_final_train = nll_inlier_row_train.reshape(-1,
                                                         1) + nll_marginal_cell

    # Cell metrics
    auc_cell_train, auc_feats = get_auc_metrics(target_errors_train,
                                                nll_cells_final_train)
    avpr_cell_train, avpr_feats = get_avpr_metrics(target_errors_train,
                                                   nll_cells_final_train)

    print(
        'Combined: OCSVM + Marginals Train -- Cell AUC: {}, Cell AVPR: {}, Row AUC: {}, Row AVPR: {}'
        .format(auc_cell_train, avpr_cell_train, auc_row_train,
                avpr_row_train))

    #Save results into csv
    if args.save_on:

        # create folder for saving experiment data (if necessary)
        folder_output = args.output_folder + "/" + args.outlier_model

        try:
            os.makedirs(folder_output)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        columns = ['AUC row', 'AVPR row', 'AUC cell', 'AVPR cell']
        results = {
            'AUC row': [auc_row_train],
            'AVPR row': [avpr_row_train],
            'AUC cell': [auc_cell_train],
            'AVPR cell': [avpr_cell_train]
        }

        #Dataframe
        df_out = pd.DataFrame(data=results, columns=columns)
        df_out.index.name = "Epochs"
        df_out.to_csv(folder_output + "/train_epochs_data.csv")

        # store AVPR for features (cell only)
        df_avpr_feat_cell = pd.DataFrame([],
                                         index=['AVPR'],
                                         columns=attributes)
        df_avpr_feat_cell.loc['AVPR'] = avpr_feats
        df_avpr_feat_cell.to_csv(folder_output + "/train_avpr_features.csv")

        # store AUC for features (cell only)
        df_auc_feat_cell = pd.DataFrame([], index=['AUC'], columns=attributes)
        df_auc_feat_cell.loc['AUC'] = auc_feats
        df_auc_feat_cell.to_csv(folder_output + "/train_auc_features.csv")
示例#8
0
def main(args):

    # Load datasets
    train_loader, X_train, target_errors_train, dataset_obj, attributes = utils.load_data(
        args.data_folder, args.batch_size, is_train=True, get_data_idxs=False)

    test_loader, X_test, target_errors_test, _, _ = utils.load_data(
        args.data_folder, args.batch_size, is_train=False)
    # -- clean versions for evaluation
    _, X_train_clean, _, _, _ = utils.load_data(args.data_folder,
                                                args.batch_size,
                                                is_train=True,
                                                is_clean=True,
                                                stdize_dirty=True)
    _, X_test_clean, _, _, _ = utils.load_data(args.data_folder,
                                               args.batch_size,
                                               is_train=False,
                                               is_clean=True,
                                               stdize_dirty=True)

    # if runnin on gpu, then load data there
    if args.cuda_on:
        X_test = X_test.cuda()
        target_errors_test = target_errors_test.cuda()
        X_train_clean = X_train_clean.cuda()
        X_test_clean = X_test_clean.cuda()
        target_errors_train = target_errors_train.cuda()
        X_train = X_train.cuda()

    # for checking w (pi) raw convergence
    logit_pi_prev_train = torch.tensor([])
    logit_pi_prev_test = torch.tensor([])

    # Import the model from the correct file
    outlier_model = __import__(args.outlier_model)
    model = outlier_model.VAE(dataset_obj, args)
    if args.load_model:
        model.load_state_dict(torch.load(args.load_model_path))

    print(args)

    if args.cuda_on:
        model.cuda()

    optimizer = optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=args.lr,
        weight_decay=args.l2_reg)  # excludes frozen params / layers

    # structs for saving data
    losses_save = {
        "train": {},
        "test": {},
        "train_per_feature": {},
        "test_per_feature": {}
    }

    # Run epochs
    for epoch in range(1, args.number_epochs + 1):

        # Training Phase
        _train_loader, _dataset_obj = train_loader, dataset_obj

        training_phase(model, optimizer, _train_loader, args, epoch)

        #Compute all the losses and metrics per epoch (Train set)
        compute_metrics(model,
                        X_train,
                        _dataset_obj,
                        args,
                        epoch,
                        losses_save,
                        logit_pi_prev_train,
                        X_train_clean,
                        target_errors_train,
                        mode="train")

        #Test Phase
        compute_metrics(model,
                        X_test,
                        dataset_obj,
                        args,
                        epoch,
                        losses_save,
                        logit_pi_prev_test,
                        X_test_clean,
                        target_errors_test,
                        mode="test")

    # save to folder AVPR / AUC per feature
    if args.save_on:

        # create folder for saving experiment data (if necessary)
        folder_output = args.output_folder + "/" + args.outlier_model

        ### Train Data
        save_to_csv(model,
                    X_train,
                    X_train_clean,
                    target_errors_train,
                    attributes,
                    losses_save,
                    dataset_obj,
                    folder_output,
                    args,
                    epoch,
                    mode='train')

        ### Test Data
        save_to_csv(model,
                    X_test,
                    X_test_clean,
                    target_errors_test,
                    attributes,
                    losses_save,
                    dataset_obj,
                    folder_output,
                    args,
                    epoch,
                    mode='test')

        # save model parameters
        model.cpu()
        torch.save(model.state_dict(), folder_output + "/model_params.pth")

        # save to .json file the args that were used for running the model
        with open(folder_output + "/args_run.json", "w") as outfile:
            json.dump(vars(args), outfile, indent=4, sort_keys=True)