Exemplo n.º 1
0
# save labels in binary and one-hot representations
labels.dump(os.path.join(labels_train_folder, 'labels_bin.np'))

# remove redundant education-number and labels features
x = delete(x, (4, 14), 1)

# enumerate parameters
monotone = True
ratios = np.arange(0, .5, .1)

for ratio in ratios:
    print '\nPerturbing {}% of data'.format(ratio)
    if ratio > 0:
        pert_data, _ = perturbate_data(x, adult_params['cat_cols'], ratio, monotone,
                                       adult_params['miss_data_symbol'],
                                       adult_params['mnar_values'])
    else:
        pert_data = x
    print "\tRatio is {} of {}".format(
            np.sum(pert_data == adult_params['miss_data_symbol']), 
            len(pert_data) * len(adult_params['cat_cols']))

    path = os.path.join(perturb_folder,
                        'adult_train_pert_mono_{}_ratio_{}.csv'.format(monotone,
                                                                       ratio))
    # save perturbed data to disk as csv
    print '\tSaving perturbed data to {}'.format(path)
    np.savetxt(path, pert_data, delimiter=",", fmt="%s")

    for imp_method in adult_params['imp_methods']:
Exemplo n.º 2
0
# save votes training data
np.savetxt('data/votes_train.csv', votes_train, delimiter=",", fmt="%s")

# For training data
print 'Preparing train data for {}'.format(dataname)

# enumerate parameters
monotone = True
ratios = np.arange(0, .5, .1)

for ratio in ratios:
    print '\nPerturbing {}% of data'.format(ratio)
    if ratio > 0:
        pert_data, _ = perturbate_data(
            votes_train, votes_params['cat_cols'], ratio, monotone,
            votes_params['miss_data_symbol'], votes_params['mnar_values'])
    else:
        pert_data = votes_train
    path = os.path.join(perturb_folder,
                        '{}_train_pert_mono_{}_ratio_{}.csv'.format(dataname,
                                                                    monotone,
                                                                    ratio))
    # save perturbed data to disk as csv
    print '\tSaving perturbed data to {}'.format(path)
    np.savetxt(path, pert_data, delimiter=",", fmt="%s")
    # impute data given imp_methods in params.py
    for imp_method in votes_params['imp_methods']:
        print '\tImputing with {}'.format(imp_method)
        imp = Imputer()
        data = impute(pert_data, imp, imp_method, votes_params)