# save labels in binary and one-hot representations labels.dump(os.path.join(labels_train_folder, 'labels_bin.np')) # remove redundant education-number and labels features x = delete(x, (4, 14), 1) # enumerate parameters monotone = True ratios = np.arange(0, .5, .1) for ratio in ratios: print '\nPerturbing {}% of data'.format(ratio) if ratio > 0: pert_data, _ = perturbate_data(x, adult_params['cat_cols'], ratio, monotone, adult_params['miss_data_symbol'], adult_params['mnar_values']) else: pert_data = x print "\tRatio is {} of {}".format( np.sum(pert_data == adult_params['miss_data_symbol']), len(pert_data) * len(adult_params['cat_cols'])) path = os.path.join(perturb_folder, 'adult_train_pert_mono_{}_ratio_{}.csv'.format(monotone, ratio)) # save perturbed data to disk as csv print '\tSaving perturbed data to {}'.format(path) np.savetxt(path, pert_data, delimiter=",", fmt="%s") for imp_method in adult_params['imp_methods']:
# save votes training data np.savetxt('data/votes_train.csv', votes_train, delimiter=",", fmt="%s") # For training data print 'Preparing train data for {}'.format(dataname) # enumerate parameters monotone = True ratios = np.arange(0, .5, .1) for ratio in ratios: print '\nPerturbing {}% of data'.format(ratio) if ratio > 0: pert_data, _ = perturbate_data( votes_train, votes_params['cat_cols'], ratio, monotone, votes_params['miss_data_symbol'], votes_params['mnar_values']) else: pert_data = votes_train path = os.path.join(perturb_folder, '{}_train_pert_mono_{}_ratio_{}.csv'.format(dataname, monotone, ratio)) # save perturbed data to disk as csv print '\tSaving perturbed data to {}'.format(path) np.savetxt(path, pert_data, delimiter=",", fmt="%s") # impute data given imp_methods in params.py for imp_method in votes_params['imp_methods']: print '\tImputing with {}'.format(imp_method) imp = Imputer() data = impute(pert_data, imp, imp_method, votes_params)