def cmidcutd(x, y, z, slice_num=20): #under condition z x_vec = np.transpose(np.array([x])) y_vec = np.transpose(np.array([slice(y, slice_num)])) z_vec = np.transpose(np.array([z])) return ee.cmidd(x_vec, y_vec, z_vec)
def casual_entropy(i, j, K, data): x = data[i] y = data[j] if len(K) == 0: return casual_entropy_empty(i, j, K, data) #slice_num = int(np.power(len(x),1.0/2)/2) slice_num = int(np.power(len(x), 0.4) / 2) if x.dtype == 'float64': x = slice(x, slice_num) x_vec = np.array([[s] for s in x]) else: x_vec = np.array([[s] for s in x]) if y.dtype == "float64": y = slice(y, slice_num) y_vec = np.array([[s] for s in y]) else: y_vec = np.array([[s] for s in y]) print("index j " + j + "is discrete") z_all = [] for k in K: z = data[k] if z.dtype == "float64": z = slice(z, slice_num) z_vec = np.array([[s] for s in z]) else: z_vec = np.array([[s] for s in z]) z_all.append(z_vec) z_combine = np.c_[tuple(z_all)] #x_clone = np.copy(x_vec) y_clone = np.copy(y_vec) ns = 200 ci = 0.95 outputs = [] #outputs2 = [] for i in range(ns): np.random.shuffle(y_clone) outputs.append(ee.cmidd(x_vec, y_clone, z_combine, base=2)) # outputs2.append(ee.midd(x_clone,y_vec,base=2)) outputs.sort() # outputs2.sort() v = ee.cmidd(x_vec, y_vec, z_combine, base=2) ave = np.mean(outputs) ci0 = outputs[int((1. - ci) / 2 * ns)] ci1 = outputs[int((1. + ci) / 2 * ns)] if v > ci1: if_large_zero = True else: if_large_zero = False n = 200 useful_result = if_large_zero * v std_modified = np.sqrt((n - 1) / n) * np.std(outputs) # multi = abs(v-ave)/np.std(outputs) multi = 0 #(statistic, pvalue) = stats.ttest_ind_from_stats(mean1=ave, std1=std_modified, nobs1=200, mean2=v, std2=0, nobs2=2,equal_val=False) # res = stats.ttest_1samp(np.array(outputs),[ave,0]) statistic = 1 print("index " + j + " function: casual_entropy, the length of data", len(x), "the slice number is", slice_num, "useful value is ", useful_result, "multi sigma is ", multi) # print("statistic, pvalue",statistic,pvalue) return useful_result, v, ave, ( ci0, ci1), if_large_zero, abs(statistic) * if_large_zero
def cmi(self, X, Y, Z): np.random.seed(0) return ee.cmidd(X.copy(order='C'), Y.copy(order='C'), z=Z.copy(order='C'))
err.append((tempmean - tempent[samplo], tempent[samphi] - tempmean)) print('samples used', Ntry) print('estimated MI', ent) print('95% conf int.\n', err) # DISCRETE ESTIMATORS print("\n\nTest of the discrete entropy estimators\n") print("For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1") x = [0, 0, 0, 0, 1, 1, 1, 1] y = [0, 1, 0, 1, 0, 1, 0, 1] z = [0, 1, 0, 1, 1, 0, 1, 0] print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z)) print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z)) print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y)) # KL Div estimator print("\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set") print("should be 0 for same distribution") sample1 = [[2 * random.random()] for i in range(200)] sample2 = [[2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2)) print("should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)") sample2 = [[3 + 2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2)) def test_discrete(size=1000, y_func=lambda x: x**2): print("\nTest discrete.")
print('samples used', Ntry) print('estimated MI', ent) print('95% conf int.\n', err) # DISCRETE ESTIMATORS print("\n\nTest of the discrete entropy estimators\n") print( "For z = y xor x, w/x, y uniform random binary, we should get H(x)=H(y)=H(z) = 1, H(x:y) etc = 0, H(x:y|z) = 1" ) x = [0, 0, 0, 0, 1, 1, 1, 1] y = [0, 1, 0, 1, 0, 1, 0, 1] z = [0, 1, 0, 1, 1, 0, 1, 0] print("H(x), H(y), H(z)", ee.entropyd(x), ee.entropyd(y), ee.entropyd(z)) print("H(x:y), etc", ee.midd(x, y), ee.midd(z, y), ee.midd(x, z)) print("H(x:y|z), etc", ee.cmidd(x, y, z), ee.cmidd(z, y, x), ee.cmidd(x, z, y)) # KL Div estimator print( "\n\nKl divergence estimator (not symmetric, not required to have same num samples in each sample set" ) print("should be 0 for same distribution") sample1 = [[2 * random.random()] for i in range(200)] sample2 = [[2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2)) print( "should be infinite for totally disjoint distributions (but this estimator has an upper bound like log(dist) between disjoint prob. masses)" ) sample2 = [[3 + 2 * random.random()] for i in range(300)] print('result:', ee.kldiv(sample1, sample2))
def first_plot(): el = EdgeList() file_name = './BA_network_all.xlsx' el.load_records(file_name) #el.smooth_and_normalize_records(sl_normalize_indices=['degree'],smooth_length=100) degree, if_rand = merge(el.records["degree"], el.records_random["degree"]) distances, if_rand = merge(el.records["distance"], el.records_random["distance"]) print(if_rand.dtype == 'int64') print(degree.dtype == 'float64') #print(len(distances),len(if_rand),len(degree)) step_num = len(if_rand) step_size = step_num // 10 edge_nums = np.arange(step_size, len(if_rand), step_size) ent1 = [] ent2 = [] ent3 = [] ent4 = [] ent5 = [] ent6 = [] ent0 = [] ent7 = [] ents = {} for i in edge_nums: if_rand_cut = if_rand[i - step_size:i] degree_cut = degree[i - step_size:i] distances_cut = distances[i - step_size:i] #print(len(if_rand_cut),len(degree_cut)) k = int(np.sqrt(len(degree_cut))) slice_num = 20 slice_num = int(np.power(len(degree_cut), 1.0 / 3)) # print("length of datas, number of spaces",len(degree_cut),slice_num) #print(k) ent1.append(ep.midcut(if_rand_cut, degree_cut, slice_num=slice_num)) ent2.append(ep.midc(if_rand_cut, degree_cut, k=20)) # ent0.append(ep.cmidcutd(if_rand_cut, degree_cut,distances_cut)) k = 20 a, b = ep.cmiddc(if_rand_cut, distances_cut, degree_cut, k=k) ent6.append(a) ent7.append(b) ent3.append(ep.cmiddcut(if_rand_cut, distances_cut, degree_cut)) if_rand_cut_copy = if_rand_cut.copy() random.shuffle(if_rand_cut_copy) ent4.append(ep.cmiddcut(if_rand_cut_copy, distances_cut, degree_cut)) random.shuffle(distances_cut) ent5.append(ep.cmiddcut(if_rand_cut, distances_cut, degree_cut)) #ent5.append(ep.cmidcutd(if_rand_cut,degree_cut,distances_cut)) #print(a.all(),b.all(),c.all()) #ent5.append(ep.cmicut(if_rand_cut, degree_cut,distances_cut)) #ent6.append(ep.cmi(if_rand_cut, degree_cut,distances_cut)) print('len') print(len(ent6)) print(len(ent0)) # print(ent6) # x_vec = np.array([[s] for s in if_rand ]) # y_vec = np.array([[s] for s in degree ]) # print("midc",ee.midc(x_vec,y_vec,base=2,k=20)) # print("midc",ep.midc(if_rand,degree,k=40,base=2)) # print(ee.shuffle_test(ee.midc,x_vec,y_vec,base=2,k=10)) # print(ep.midc(if_rand,degree,k=20,base=2)) slice_num = int(np.power(len(degree), 1.0 / 2)) print(ep.midd(if_rand, distances)) x_vec = np.transpose(np.array([if_rand])) y_vec = np.transpose(np.array([distances])) z_vec = np.transpose(np.array([ep.slice(degree, slice_num)])) print(ee.shuffle_test(ee.midd, x_vec, y_vec, base=2)) print(ee.shuffle_test(ee.midd, x_vec, z_vec, base=2)) print(ee.cmidd(x_vec, z_vec, y_vec, base=2)) # print(ee.shuffle_test(ee.cmidd,x_vec,y_vec,z_vec,base=2)) # plt.plot(ent0,label = r'0') # plt.plot(ent6,label = r"6") plt.plot(ent7, label=r"7") plt.plot(ent1, 'vb-', label=r'1') plt.plot(ent2, 'or--', label=r'2') plt.plot(ent3, label=r'3') plt.plot(ent4, label=r'4') plt.plot(ent5, label=r'5') # plt.plot(edge_nums, ent5, 'b',label='5') # plt.plot(edge_nums, ent6, 'b--',label='6') plt.legend() plt.show()