def compare_H1_H2_structural_attack(bG, aG, bins): cand_size, bin_size, sig_list, bucket_list = equivalence_class_H1(bG, bins) print "H1-bG", bin_size cand_size, bin_size, sig_list, bucket_list = equivalence_class_H1(aG, bins) print "H1-aG", bin_size h2_list = equivalence_class_H2_open(bG, None) # open world cand_size, bin_size, sig_list, bucket_list = bucket_H2(h2_list, bins) print "H2-open-bG", bin_size h2_list = equivalence_class_H2_open(aG, None) cand_size, bin_size, sig_list, bucket_list = bucket_H2(h2_list, bins) print "H2-open-aG", bin_size
def incorrectness_uncertain_from_file(before_file, after_file, sample_file, n_samples, bins): # compute sig_list_b, bucket_list_b ONCE ! start = time.clock() bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int) # G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True) print "read bG: DONE, elapsed :", time.clock() - start h2_list = equivalence_class_H2_open(bG, None) cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins) # print "len B:", len(sig_list_b), len(bucket_list_b) # H1 score, H2 score start = time.clock() score_H1 = 0.0 score_H2 = 0.0 count = 0 for i in range(n_samples): file_name = sample_file + str(i) aG = nx.read_edgelist(file_name, '#', '\t', create_using=nx.MultiGraph(), nodetype=int, data=False) # IMPORTANT: MultiGraph # H1 sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins) score_H1 += sum_re_prob # H2 sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins) score_H2 += sum_re_prob print "count =", count count += 1 # score_H1 = score_H1/n_samples score_H2 = score_H2/n_samples print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start # return score_H1, score_H2
def incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins): h2_list = equivalence_class_H2_open(aG, None) cand_size, bin_size, sig_list_a, bucket_list_a = bucket_H2(h2_list, bins) # print "len A:", len(sig_list_a), len(bucket_list_a) # compute incorrectness score re_prob_dict = {} # re_prob_dict[u] = reidentification probability of u for id_b in range(len(sig_list_b)): sig_b = sig_list_b[id_b] # 1 - binary search in sig_list_a lo = 0 hi = len(sig_list_a) - 1 while True: mid = (lo + hi) / 2 sig_a = sig_list_a[mid] # if list_comparator(sig_a, sig_b) == 0: set_a = set(bucket_list_a[mid]) for u in bucket_list_b[id_b]: if u in set_a: re_prob_dict[u] = 1.0 / len(set_a) else: re_prob_dict[u] = 0.0 break # if list_comparator(sig_b, sig_a) < 0: hi = mid - 1 if hi < lo: break if list_comparator(sig_b, sig_a) > 0: lo = mid + 1 if lo > hi: break # 2 - linear search in sig_list_a # for id_a in range(len(sig_list_a)): # sig_a = sig_list_a[id_a] # if list_comparator(sig_a, sig_b) == 0: # need more effective Binary Search # set_a = set(bucket_list_a[id_a]) # for u in bucket_list_b[id_b]: # if u in set_a: # re_prob_dict[u] = 1.0/len(set_a) # else: # re_prob_dict[u] = 0.0 # break # sum_re_prob = sum(re_prob_dict.itervalues()) return sum_re_prob, re_prob_dict
def incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins): h2_list = equivalence_class_H2_open(aG, None) cand_size, bin_size, sig_list_a, bucket_list_a = bucket_H2(h2_list, bins) # print "len A:", len(sig_list_a), len(bucket_list_a) # compute incorrectness score re_prob_dict = {} # re_prob_dict[u] = reidentification probability of u for id_b in range(len(sig_list_b)): sig_b = sig_list_b[id_b] # 1 - binary search in sig_list_a lo = 0 hi = len(sig_list_a)-1 while True: mid = (lo+hi)/2 sig_a = sig_list_a[mid] # if list_comparator(sig_a, sig_b) == 0: set_a = set(bucket_list_a[mid]) for u in bucket_list_b[id_b]: if u in set_a: re_prob_dict[u] = 1.0/len(set_a) else: re_prob_dict[u] = 0.0 break # if list_comparator(sig_b, sig_a) < 0: hi = mid-1 if hi < lo: break if list_comparator(sig_b, sig_a) > 0: lo = mid+1 if lo > hi: break # 2 - linear search in sig_list_a # for id_a in range(len(sig_list_a)): # sig_a = sig_list_a[id_a] # if list_comparator(sig_a, sig_b) == 0: # need more effective Binary Search # set_a = set(bucket_list_a[id_a]) # for u in bucket_list_b[id_b]: # if u in set_a: # re_prob_dict[u] = 1.0/len(set_a) # else: # re_prob_dict[u] = 0.0 # break # sum_re_prob = sum(re_prob_dict.itervalues()) return sum_re_prob, re_prob_dict
def incorrectness_uncertain_from_file(before_file, after_file, sample_file, n_samples, bins): # compute sig_list_b, bucket_list_b ONCE ! start = time.clock() bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int) # G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True) print "read bG: DONE, elapsed :", time.clock() - start h2_list = equivalence_class_H2_open(bG, None) cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins) # print "len B:", len(sig_list_b), len(bucket_list_b) # H1 score, H2 score start = time.clock() score_H1 = 0.0 score_H2 = 0.0 count = 0 for i in range(n_samples): file_name = sample_file + str(i) aG = nx.read_edgelist(file_name, '#', '\t', create_using=nx.MultiGraph(), nodetype=int, data=False) # IMPORTANT: MultiGraph # H1 sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins) score_H1 += sum_re_prob # H2 sum_re_prob, re_prob_dict = incorrectness_H2_open( aG, sig_list_b, bucket_list_b, bins) score_H2 += sum_re_prob print "count =", count count += 1 # score_H1 = score_H1 / n_samples score_H2 = score_H2 / n_samples print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start # return score_H1, score_H2
def incorrectness_uncertain(before_file, after_file, bins): # compute sig_list_b, bucket_list_b ONCE ! bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int) G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True) h2_list = equivalence_class_H2_open(bG, None) cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins) # print "len B:", len(sig_list_b), len(bucket_list_b) # list of sampled graphs g_list = [] start = time.clock() for i in range(N_SAMPLES): g_list.append(generate_sample(G)) print "Sampling graphs - Elapsed ", (time.clock() - start) # H1 score start = time.clock() score_H1 = 0.0 for aG in g_list: sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins) score_H1 += sum_re_prob score_H1 = score_H1 / N_SAMPLES print "compute score_H1: DONE, elapsed :", time.clock() - start # H2 score score_H2 = 0.0 count = 0 for aG in g_list: sum_re_prob, re_prob_dict = incorrectness_H2_open( aG, sig_list_b, bucket_list_b, bins) score_H2 += sum_re_prob print "count =", count count += 1 score_H2 = score_H2 / N_SAMPLES # return score_H1, score_H2
def incorrectness_uncertain(before_file, after_file, bins): # compute sig_list_b, bucket_list_b ONCE ! bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int) G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True) h2_list = equivalence_class_H2_open(bG, None) cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins) # print "len B:", len(sig_list_b), len(bucket_list_b) # list of sampled graphs g_list = [] start = time.clock() for i in range(N_SAMPLES): g_list.append(generate_sample(G)) print "Sampling graphs - Elapsed ", (time.clock() - start) # H1 score start = time.clock() score_H1 = 0.0 for aG in g_list: sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins) score_H1 += sum_re_prob score_H1 = score_H1/N_SAMPLES print "compute score_H1: DONE, elapsed :", time.clock() - start # H2 score score_H2 = 0.0 count = 0 for aG in g_list: sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins) score_H2 += sum_re_prob print "count =", count count += 1 score_H2 = score_H2/N_SAMPLES # return score_H1, score_H2