def get_result_dataset(att_trees, data, l=5, num_test=10): """ fix l and QI, while changing size of dataset num_test is the test nubmber. """ data_back = copy.deepcopy(data) length = len(data_back) joint = 5000 dataset_num = length / joint print "L=%d" % l if length % joint == 0: dataset_num += 1 for i in range(1, dataset_num + 1): pos = i * joint ncp = rtime = 0 if pos > length: continue print "#" * 30 print "size of dataset %d" % pos for j in range(num_test): temp = random.sample(data, pos) _, eval_result = mondrian_l_diversity(att_trees, temp, l) ncp += eval_result[0] rtime += eval_result[1] data = copy.deepcopy(data_back) ncp /= num_test rtime /= num_test print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + " seconds" print "#" * 30
def test_mondrian_l_diversity(self): init_tree() att_trees = [ATT_TREE] data = [['a1', 'a1'], ['b2', 'a1'], ['b1', 'b1'], ['b2', 'b1'], ['b1', 'b2'], ['b2', 'b2'], ['a1', 'a2']] result, eval_result = mondrian_l_diversity(att_trees, data, 2) self.assertTrue(abs(eval_result[0] - 0) <= 0.001)
def Separation_Gen(att_trees, data, k=10, l=5): """Using partition_for_transaction to anonymize SA (transaction) partition, while applying anatomy to separate QID and SA return (result, eval_result) result is 2-dimensional list eval_result is a tuple (rncp, tncp, rtime) """ global ATT_TREES, DATA ATT_TREES = att_trees DATA = data start_time = time.time() if _DEBUG: print "size of dataset %d" % len(data) result = [] # copy transaction part of data to trans trans = [t[-1] for t in data] # anonymize transaction part with partition algorithm trans_set, sa_ncp = partition(att_trees[-1], trans, k) partition_data = [] for ttemp in trans_set: (index_list, tran_value) = ttemp for t in index_list: DATA[t][-1] = tran_value[:] partition_data.append(DATA[t][:]) if _DEBUG: print "Begin Mondrian" # anonymize qid and sa part with mondrian_l_diversity result, qid_ncp = mondrian_l_diversity(ATT_TREES, partition_data, l) rtime = float(time.time() - start_time) if _DEBUG: print "Total running time = %.2f seconds" % rtime # transform data format (QID1,.., QIDn, SA set, GroupID, 1/|group size|, Group SA domain) # 1/|group size|, Group SA domain will be used in evaluation return (result, (qid_ncp, sa_ncp, rtime))
def m_generalization(att_trees, data, k=10, l=5): """Using partition_for_transaction to anonymize SA (transaction) partition, while applying anatomy to separate QID and SA return (result, eval_result) result is 2-dimensional list eval_result is a tuple (rncp, tncp, rtime) """ global ATT_TREES, DATA ATT_TREES = att_trees DATA = data start_time = time.time() if _DEBUG: print "size of dataset %d" % len(data) result = [] trans = [t[-1] for t in data] trans_set, tncp = partition(att_trees[-1], trans, k) partition_data = [] for ttemp in trans_set: (index_list, tran_value) = ttemp for t in index_list: DATA[t][-1] = tran_value[:] partition_data.append(DATA[t][:]) if _DEBUG: print "Begin Mondrian" result, rncp = mondrian_l_diversity(ATT_TREES, partition_data, l) rtime = float(time.time() - start_time) if _DEBUG: print "Total running time = %.2f seconds" % rtime # transform data format (QID1,.., QIDn, SA set, GroupID, 1/|group size|, Group SA domain) # 1/|group size|, Group SA domain will be used in evaluation return (result, (rncp, tncp, rtime))
def get_result_dataset(att_trees, data, l=5, num_test=10): """ fix l and QI, while changing size of dataset num_test is the test nubmber. """ data_back = copy.deepcopy(data) length = len(data_back) joint = 5000 dataset_num = length / joint print "L=%d" % l if length % joint == 0: dataset_num += 1 for i in range(1, dataset_num + 1): pos = i * joint ncp = rtime = 0 if pos > length: continue print '#' * 30 print "size of dataset %d" % pos for j in range(num_test): temp = random.sample(data, pos) _, eval_result = mondrian_l_diversity(att_trees, temp, l) ncp += eval_result[0] rtime += eval_result[1] data = copy.deepcopy(data_back) ncp /= num_test rtime /= num_test print "Average NCP %0.2f" % ncp + "%" print "Running time %0.2f" % rtime + " seconds" print '#' * 30
def test_group(self): init_tree() att_trees = [ATT_TREE] data = [['a1', ['female', 'diabetes']], ['a1', ['female', 'cold']], ['b1', ['male', 'diabetes']], ['a2', ['male', 'cold']]] result, eval_result = mondrian_l_diversity(att_trees, data, 2) print result self.assertTrue(abs(eval_result[0] - 0))
def get_result_one(att_trees, data, l=5): """ run mondrian_l_diversity for one time, with l=5 """ print "L=%d" % l data_back = copy.deepcopy(data) _, eval_result = mondrian_l_diversity(att_trees, data, l) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def test_single(self): init_tree() att_trees = [ATT_TREE] * 2 data = [['a1', 'b1', 'cold'], ['a2', 'b1', 'cold'], ['a1', 'b2', 'cold'], ['a1', 'b1', 'cold'], ['a1', 'b2', 'cancer'], ['a1', 'b1', 'cancer']] result, eval_result = mondrian_l_diversity(att_trees, data, 2) print result self.assertTrue(abs(eval_result[0] - 0))
def get_result_one(att_trees, data, l=5): """ run mondrian_l_diversity for one time, with l=5 """ print "L=%d" % l data_back = copy.deepcopy(data) result, eval_result = mondrian_l_diversity(att_trees, data, l) write_to_file(result) data = copy.deepcopy(data_back) print "NCP= %0.2f %%" % eval_result[0] print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_one(att_trees, data, l=5): """ run mondrian_l_diversity for one time, with l=5 """ print "L=%d" % l data_back = copy.deepcopy(data) _, eval_result = mondrian_l_diversity(att_trees, data, l) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" # print _ print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_l(att_trees, data): """ change l, whle fixing QD and size of dataset """ data_back = copy.deepcopy(data) for l in range(2, 21): print '#' * 30 print "L=%d" % l result, eval_result = mondrian_l_diversity(att_trees, data, l) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_l(att_trees, data): """ change l, whle fixing QD and size of dataset """ data_back = copy.deepcopy(data) for l in range(2, 21): print "#" * 30 print "L=%d" % l result, eval_result = mondrian_l_diversity(att_trees, data, l) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_one(att_trees, data, l=5): # run mondrian_l_diversity for one time, with l=5 print "L=%d" % l data_back = copy.deepcopy(data) result, eval_result = mondrian_l_diversity(att_trees, data, l) write_to_file(result) data = copy.deepcopy(data_back) print "" print "Normalized Certainty Penalty (NCP): %0.2f %%" % eval_result[0] print "Done in %.2f seconds (%.3f minutes (%.2f hours))" % ( eval_result[1], eval_result[1] / 60, eval_result[1] / 60 / 60)
def get_result_qi(att_trees, data, l=5): """ change nubmber of QI, whle fixing l and size of dataset """ data_back = copy.deepcopy(data) num_data = len(data[0]) print "L=%d" % l for i in reversed(range(1, num_data)): print '#' * 30 print "Number of QI=%d" % i _, eval_result = mondrian_l_diversity(att_trees, data, l, i) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_qi(att_trees, data, l=5): """ change nubmber of QI, whle fixing l and size of dataset """ data_back = copy.deepcopy(data) num_data = len(data[0]) print "L=%d" % l for i in reversed(range(1, num_data)): print "#" * 30 print "Number of QI=%d" % i _, eval_result = mondrian_l_diversity(att_trees, data, l, i) data = copy.deepcopy(data_back) print "NCP %0.2f" % eval_result[0] + "%" print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_l(att_trees, data): # change l, while fixing QD and size of dataset data_back = copy.deepcopy(data) for l in range(2, 21): print '#' * 30 print "L=%d" % l result, eval_result = mondrian_l_diversity(att_trees, data, l) data = copy.deepcopy(data_back) print "" print "Normalized Certainty Penalty (NCP): %0.2f %%" % eval_result[0] print "Done in %.2f seconds (%.3f minutes (%.2f hours))" % ( eval_result[1], eval_result[1] / 60, eval_result[1] / 60 / 60)
def get_result_one(att_trees, data, l=config2.l): """ run mondrian_l_diversity for one time, with l=5 """ res = "" print("L=%d" % l) data_back = copy.deepcopy(data) result, eval_result = mondrian_l_diversity(att_trees, data, l) write_to_file(result) data = copy.deepcopy(data_back) print("NCP= %0.2f %%" % eval_result[0]) print('内存使用:', (psutil.Process(os.getpid()).memory_full_info()).uss / 1024. / 1024., 'MB') print("Running time %0.2f" % eval_result[1] + " seconds") res = res + "k= " + str(l) + ", ncp = " + str( eval_result[0]) + "%, memory = " + str( (psutil.Process(os.getpid()).memory_full_info()).uss / 1024. / 1024.) + ", runtime = " + str(eval_result[1]) with open("data/result1.data", "a") as fp: fp.write(res + '\n')