def mae2(data_raw, privacy_budget, best_f): best_and_selected2 = select_ci2(1, backup_solutions, best_f, mcd, u2_dict) print_cs('best+u2选出来的特征', data_raw[best_and_selected2]) print_cs('best+u2选出来的特征', data_raw[best_and_selected2], float(mcd)) # 我们的算法,使用mcd为阈值 u2 = algo_2_count.noise_count_error(data_raw[best_and_selected2], funcs.cs(data_raw[best_and_selected2], mcd)['CS_i'], privacy_budget) # cs,阈值为默认0.5 mae_cs = algo_2_count.noise_count_error(data_raw[best_and_selected2], funcs.cs(data_raw[best_and_selected2])['CS_i'], privacy_budget) # GS,阈值为默认0.5 mae_gs = algo_2_count.noise_count_error(data_raw[best_and_selected2], funcs.cs(data_raw[best_and_selected2])['GS'], privacy_budget) # print('u2:' + str(u2)) # print('u2cs:' + str(mae_cs)) # print('u2gs:' + str(mae_gs)) return u2, mae_cs, mae_gs
def MAE1(privacy_budget): result1 = selectCi1(1, partitionC(adjustment_features), best_features) u1 = algo_2_count.noise_count_error(data[result1], funcs.cs(data[result1], mcd)['CS_i'], privacy_budget) print(u1) return u1
def MAE2(privacy_budget): result2 = selectCi2(1, partitionC(adjustment_features), best_features) u2 = algo_2_count.noise_count_error(data[result2], funcs.cs(data[result2], mcd)['CS_i'], privacy_budget) print(u2) return u2
def mae1(privacy_budget): best_and_selected1 = select_ci1(1, backup_solutions, best_f) # print_cs('best+u1选出来的特征', data_raw[best_and_selected1]) # print_cs('best+u1选出来的特征', data_raw[best_and_selected1], float(mcd)) u1 = algo_2_count.noise_count_error( data_raw[best_and_selected1], funcs.cs(data_raw[best_and_selected1], mcd)['CS_i'], privacy_budget) # cs,阈值为默认0.5 mae_cs = algo_2_count.noise_count_error( data_raw[best_and_selected1], funcs.cs(data_raw[best_and_selected1])['CS_i'], privacy_budget) # GS,阈值为默认0.5 mae_gs = algo_2_count.noise_count_error( data_raw[best_and_selected1], funcs.cs(data_raw[best_and_selected1])['GS'], privacy_budget) print('u1' + str(u1)) print('u1cs' + str(mae_cs)) print('u1gs' + str(mae_gs)) return u1, mae_cs, mae_gs
import numpy as np import pandas as pd import funcs # 读取数据 d1 = pd.read_csv('data/d1.csv') d2 = pd.read_csv('data/d2.csv') d = pd.read_csv('data/d_new.csv') cs_1 = funcs.cs(d1) print('对D1运行:\n', cs_1) print('------------------') cs_2 = funcs.cs(d2) print('对D2运行:\n', cs_2) print('------------------') mcd = np.mean([cs_1['CS_mean'], cs_2['CS_mean']]) print('MCD为', mcd) print('------------------') cs_3 = funcs.cs(d) print('对D运行,阈值为0.5:\n', cs_3) print('------------------') cs_3_mcd = funcs.cs(d, threshold=mcd) print('对D运行,阈值为mcd:\n', cs_3_mcd) print('------------------') # 清洗数据 ad_x, ad_y, names = funcs.data_clean(d) x = ad_x.values y = list(ad_y) # 特征选择 best_feature_set_names, adjusted_feature_set_names = funcs.randomized_lasso(
def MCD(): n_party_data = [data1, data2] return np.mean([funcs.cs(x)['CS_mean'] for x in n_party_data])
def utilityFunction2(feature_names): cs_ci = funcs.cs(data[feature_names], mcd)['CS_i'] return MCD() / cs_ci
def print_cs(name, data, threshold=0.5): print(name + ':(阈值为 %s )' % threshold) print(data.columns) print(funcs.cs(data, threshold))
def u2_initialize(c): # 将备选方案中所有子列表转元组,因为list是unhashable tuple_c = tuple(tuple(x) for x in c) return dict( zip(tuple_c, map(lambda x: funcs.cs(data_raw[x], mcd)['CS_i'], c)))
def get_mcd(subdatas): return np.mean([funcs.cs(x)['CS_mean'] for x in subdatas])
def get_mcd(data_raw): return np.mean([funcs.cs(x)['CS_mean'] for x in funcs.split(data_raw, 3)])