def test_read_csv_and_anonymise(self): from utils.read_adult_data import read_data as read_adult DATA, INTUITIVE_ORDER = read_adult() result, eval_result = mondrian(DATA, 40, False) print(result)
print("K=%d" % k) _, eval_result = clustering_based_k_anon(att_trees, data, type_alg, k) data = copy.deepcopy(data_back) print("NCP %0.2f" % eval_result[0] + "%") all_ncp.append(round(eval_result[0], 2)) print("Running time %0.2f" % eval_result[1] + "seconds") all_rtime.append(round(eval_result[1], 2)) print("All NCP", all_ncp) print("All Running time", all_rtime) if __name__ == '__main__': print("Using Adult Dataset") DATA = read_adult() ATT_TREES = read_adult_tree() TYPE_ALG = 'oka' FLAG = '' try: FLAG = sys.argv[1] except IndexError: pass if FLAG == 'k': get_result_k(ATT_TREES, DATA, TYPE_ALG) elif FLAG == 'n': get_result_n(ATT_TREES, DATA, TYPE_ALG) elif FLAG == '': if __DEBUG: cProfile.run('get_result_one(ATT_TREES, DATA, TYPE_ALG)')
if __name__ == '__main__': FLAG = '' LEN_ARGV = len(sys.argv) try: DATA_SELECT = sys.argv[1] FLAG = sys.argv[2] except: pass k = 10 if DATA_SELECT == 'i': RAW_DATA = read_informs() ATT_TREES = read_informs_tree() else: RAW_DATA = read_adult() ATT_TREES = read_adult_tree() print '#' * 30 if DATA_SELECT == 'a': print "Adult data" else: print "INFORMS data" print '#' * 30 if FLAG == 'k': get_result_k(ATT_TREES, RAW_DATA) elif FLAG == 'qi': get_result_qi(ATT_TREES, RAW_DATA) elif FLAG == 'data': get_result_dataset(ATT_TREES, RAW_DATA) elif FLAG == 'one': if LEN_ARGV > 3:
RELAX = False else: RELAX = True if RELAX: print "Relax Mondrian" else: print "Strict Mondrian" if DATA_SELECT == 'i': print "INFORMS data" DATA = read_informs() else: print "Adult data" # INTUITIVE_ORDER is an intutive order for # categorical attrbute. This order is produced # by the reading (from dataset) order. DATA, INTUITIVE_ORDER = read_adult() if FLAG == 'k': get_result_k(DATA) elif FLAG == 'qi': get_result_qi(DATA) elif FLAG == 'data': get_result_dataset(DATA) elif FLAG == '': get_result_one(DATA) else: try: INPUT_K = int(FLAG) get_result_one(DATA, INPUT_K) except ValueError: print "Usage: python anonymizer [r|s] [a | i] [k | qi | data]" print "r: relax mondrian, s: strict mondrian"
LEN_ARGV = len(sys.argv) try: DATA_SELECT = sys.argv[1] TYPE_ALG = sys.argv[2] FLAG = sys.argv[3] except IndexError: pass INPUT_K = 5 # read record if DATA_SELECT == 'i': print "INFORMS data" DATA = read_informs() ATT_TREES = read_informs_tree() else: print "Adult data" DATA = read_adult() ATT_TREES = read_adult_tree() if __DEBUG: # DATA = DATA[:2000] # print "Test anonymization with %d records" % len(DATA) # ec_exam_by_dim(DATA) # ec_exam_by_size_data(DATA) # print ec_distribution(DATA) # dim_ec_distribution(DATA) # size_ec_distribution(DATA) # pdb.set_trace() print sys.argv if FLAG == 'k': get_result_k(ATT_TREES, DATA, TYPE_ALG) elif FLAG == 'qi': get_result_qi(ATT_TREES, DATA, TYPE_ALG)