def estimate_Information(Xs, Ys, Ts): """Estimation of the MI from missing data based on k-means clustring""" estimate_IXT = ee.mi(Xs, Ts) estimate_IYT = ee.mi(Ys, Ts) #estimate_IXT1 = ee.mi(Xs, Ts) #estimate_IYT1 = ee.mi(Ys, Ts) return estimate_IXT, estimate_IYT
def temp(self): d1 = self.data.getSeries('A') d2 = self.data.getSeries('B') p1 = self.mi_prepare(d1) p2 = self.mi_prepare(d2) mi_ab = ee.mi(p1, p2, k=3) mi_aa = ee.mi(p1, p1, k=3) Dprint('mi ab = ', mi_ab) Dprint('mi aa = ', mi_aa) return
def isIndependentMI(self, var1, var2): #print ('var1 = ', var2) d1 = self.data.getSeries(var1) d2 = self.data.getSeries(var2) cum = 0 count = 0 for i in range(0, len(d1), 100): td1 = d1[i:i + 100] td2 = d2[i:i + 100] tmi = self.isIndependentMI2(td1, td2) Dprint('tmi = ', tmi) cum += tmi count += 1 mi = cum / float(count) Dprint('mi = ', mi) Dprint('org_mi = ', ee.mi(self.mi_prepare(d1), self.mi_prepare(d2))) # Normalize the MI using: corr = I(X;Y)/(H(X)*H(Y))**.5 h1 = self.getEntropy(var1) h2 = self.getEntropy(var2) corr = mi / ((h1 * h2)**.5) if corr < miIndThreshold: Dprint(var1, 'is independent of', var2, corr) return True Dprint(var1, 'not independent of', var2, corr) return False
def test_mutual_information_mod(dat, k=3, show=False): d = int(dat.d / 2) C = dat.C X = dat.X[:, 0:d] Y = dat.X[:, d:] # Estimamos MI_est = mi.mutual_information((X, Y), k) MI_est2 = ee.mi(X, Y) # Información mutua teórica MI_th = 0 for i in range(0, 2 * d): MI_th += mi.entropy_gaussian(C[i, i]) / np.log(2) MI_th -= mi.entropy_gaussian(C) / np.log(2) # Calculamos las diferencias dif1 = abs(MI_est - MI_th) dif2 = abs(MI_est2 - MI_th) if show: print("IM gaussiana:") print("Teórica: ", MI_th) print("Estimador 1: ", MI_est) print("Estimador 2: ", MI_est2) print("Error 1: ", dif1) print("Error 2: ", dif2) return dif1, dif2
def test_mutual_information(dat, k=3): C = dat.C X = dat.X[:, 0:2] Y = dat.X[:, 2:4] MI_est = mi.mutual_information((X, Y), k=k) MI_est2 = ee.mi(X, Y, k=k) MI_th = (mi.entropy_gaussian(C[0, 0]) / np.log(2) + mi.entropy_gaussian(C[1, 1]) / np.log(2) + mi.entropy_gaussian(C[2, 2]) / np.log(2) + mi.entropy_gaussian(C[3, 3]) / np.log(2) - mi.entropy_gaussian(C) / np.log(2)) # Imprimimos los resultados print("IM gaussiana:") print("Teórica: ", MI_th) print("Estimador 1: ", MI_est) print("Estimador 2: ", MI_est2) print("Error 1: ", abs(MI_est - MI_th)) print("Error 2: ", abs(MI_est2 - MI_th)) np.testing.assert_array_less(MI_est, MI_th) np.testing.assert_array_less(MI_th, MI_est + .3) np.testing.assert_array_less(MI_est2, MI_th) np.testing.assert_array_less(MI_th, MI_est2 + .3)
def mutual_information(dataframe, measure1, measure2, report): name = '%s - %s' % (measure1, measure2) vector_list_x = ee.vectorize(dataframe[measure1]) vector_list_y = ee.vectorize(dataframe[measure2]) mutual_information = ee.mi(vector_list_x, vector_list_y) report_line = 'Informacion Mutua %s: %s\n'%(name, mutual_information) report.write(report_line) report.flush()
def isIndependentMI_Org(self, var1, var2): #print ('var1 = ', var2) d1 = self.data.getSeries(var1) d2 = self.data.getSeries(var2) p1 = self.mi_prepare(d1) p2 = self.mi_prepare(d2) #print ('d1 = ', d2) mi = ee.mi(p1, p2) # Normalize the MI using: corr = I(X;Y)/(H(X)*H(Y))**.5 h1 = self.getEntropy(var1) h2 = self.getEntropy(var2) corr = mi / ((h1 * h2)**.5) if corr < miIndThreshold: Dprint(var1, 'is independent of', var2, corr) return True print(var1, 'not independent of', var2, corr) return False
def test_normality(N, ds, ns, k): for d in ds: for n in ns: print("ENTROPÍA") # Generamos los datos X = np.array([data.Data('random', n, d).X for i in range(0, N)]) # Calculamos la entropía mi_ent = np.array([mi.entropy(x, k=k) for x in X]) ee_ent = np.array([ee.entropy(x, k=k) for x in X]) # Calculamos las diferencias entre las entropías D = np.array([X1 - X2 for X1, X2 in zip(mi_ent, ee_ent)]) # Realizamos el test de normalidad k2, p = stats.normaltest(D) print("d: ", d, ", n: ", n) if p <= 0.05: # Hipótesis nula: D proviene de una distribución normal print("Podemos rechazar la hipótesis nula") else: print("No podemos rechazar la hipótesis nula") print("INFORMACIÓN MUTUA") # Generamos los datos Y = [data.Data('random', n, d).X for i in range(0, N)] # Calculamos la información mutua mi_mi = np.array( [mi.mutual_information((x, y), k=k) for x, y in zip(X, Y)]) ee_mi = np.array([ee.mi(x, y, k=k) for x, y in zip(X, Y)]) # Calculamos las diferencias entre las implementaciones D = np.array([X1 - X2 for X1, X2 in zip(mi_mi, ee_mi)]) # Realizamos el test de normalidad k2, p = stats.normaltest(D) print("d: ", d, ", n: ", n) if p <= 0.05: # Hipótesis nula: D proviene de una distribución normal print("Podemos rechazar la hipótesis nula") else: print("No podemos rechazar la hipótesis nula")
def ttest(N, ds, ns, k, save): # DataFrame - MultiIndex para almacenar mediciones # Creamos nombres de filas y columnas iterables = [ds, ['p', 't']] index = pd.MultiIndex.from_product(iterables, names=['ds', 'test-res']) iterables2 = [['ent', 'mi'], ns] cols = pd.MultiIndex.from_product(iterables2, names=['función', 'ns']) # Creamos el DataFrame df = pd.DataFrame(index=index, columns=cols) for d in ds: for n in ns: # ENTROPÍA # Generamos los datos X = [data.Data('random', n, d).X for i in range(0, N)] # Calculamos la entropía mi_ent = np.array([mi.entropy(x, k=k) for x in X]) ee_ent = np.array([ee.entropy(x, k=k) for x in X]) # Calculamos el ttest t, p = stats.ttest_rel(mi_ent, ee_ent) # Almacenamos los datos df.loc[(d, 'p'), ('ent', n)] = p df.loc[(d, 't'), ('ent', n)] = t # INFORMACIÓN MUTUA Y = [data.Data('random', n, d).X for i in range(0, N)] mi_mi = np.array( [mi.mutual_information((x, y), k=k) for x, y in zip(X, Y)]) ee_mi = np.array([ee.mi(x, y, k=k) for x, y in zip(X, Y)]) # Calculamos el ttest t, p = stats.ttest_rel(mi_mi, ee_mi) # Almacenamos los datos df.loc[(d, 'p'), ('mi', n)] = p df.loc[(d, 't'), ('mi', n)] = t # Imprimimos en un archivo por cada función los resultados hasta el momento if (save): df.to_pickle("./stats/ttest.pkl")
def isWinning(feature_col, coalition, rows, threshold=0.50): ''' Checks if union of the feature and coalitions leads to win A feature is winning if it's interdependent on atleast half of the members in the coalition. The interdependence is measured using conditional mutual information. ''' total_dependence = 0 x = feature_col.reshape(-1, 1).tolist() if len(coalition) == 1: y = rows[:, [coalition[0]]].tolist() return ee.mi(x, y) >= threshold for i in range(0, len(coalition)): y = rows[:, [coalition[i]]].tolist() z = rows[:, coalition[:i] + coalition[i + 1:]].tolist() if ee.cmi(x, y, z) >= threshold: total_dependence = total_dependence + 1 return float(total_dependence) / float(len(coalition)) >= 0.5
def mutualInformation(ImageStack,TimeRef,FlagROI,ROI): MutualInfo = [] for z in range(TimeRef[0]+1,TimeRef[1]): if FlagROI: image1 = ImageStack[z-1,ROI[0]:ROI[1],ROI[2]:ROI[3]] image2 = ImageStack[z,ROI[0]:ROI[1],ROI[2]:ROI[3]] else: image1 = ImageStack[z-1,:,:] image2 = ImageStack[z,:,:] c=ee.vectorize(image1.flatten()) d=ee.vectorize(image2.flatten()) mi=ee.mi(c,d) MutualInfo.append(mi) return MutualInfo
print('Mutual Information') trueent = 0.5 * (1 + log(2. * pi * cov[0][0])) # x sub trueent += 0.5 * (1 + log(2. * pi * cov[1][1])) # y sub trueent += -0.5 * (2 + log(4. * pi * pi * det( [[cov[0][0], cov[0][1]], [cov[1][0], cov[1][1]]]))) # xz sub print('true MI(x:y)', trueent / log(2)) ent = [] err = [] for NN in Ntry: tempent = [] for j in range(nsamples): points = nr.multivariate_normal(mean, cov, NN) x = [point[:1] for point in points] y = [point[1:2] for point in points] tempent.append(ee.mi(x, y)) tempent.sort() tempmean = np.mean(tempent) ent.append(tempmean) err.append((tempmean - tempent[samplo], tempent[samphi] - tempmean)) print('samples used', Ntry) print('estimated MI', ent) print('95% conf int.\n', err) print('\nIF you permute the indices of x, e.g., MI(X:Y) = 0') # You can use shuffle_test method to just get mean, standard deviation ent = [] err = [] for NN in Ntry: tempent = []
def estimate_Information(Xs, Ys, Ts): """Estimation of the MI from mising data""" estimate_IXT = ee.midc(Xs, Ts) estimate_IYT = ee.mi(Ys, Ts) return estimate_IXT, estimate_IYT
spearman = sp.spearmanr( color_mat[:, 7, t][np.where(~np.isnan(color_mat[:, 5, t]))], color_mat[:, 9, t][np.where(~np.isnan(color_mat[:, 5, t]))])[0] if n < 2: if n == 0: if t == t_len - 1: total_x_wt = np.append(total_x_wt, x.ravel()) total_y_wt = np.append(total_y_wt, y.ravel()) total_z_wt = np.append(total_z_wt, z.ravel()) if n == 1: if t == t_len - 1: total_x_c = np.append(total_x_c, x.ravel()) total_y_c = np.append(total_y_c, y.ravel()) total_z_c = np.append(total_z_c, z.ravel()) try: info = ee.mi(ee.vectorize(y), ee.vectorize(z), k=3) except: info = 0 n_cells = len(np.where(~np.isnan(color_mat[:, 5, t]))[0]) df2 = pd.DataFrame({ 'X': [np.std(x) / np.mean(x)], 'Y': [np.std(y) / np.mean(y)], 'Z': [np.std(z) / np.mean(z)], 'Information': [info], 'Correlation': [spearman], 'nCells': [n_cells], 'label': [tags[n]] }) frames = [df, df2] df = pd.concat(frames)
def main(): sns.set_style('white') folder1 = 'Fig3_SourceData_1' names1 = listdir(folder1) if '.DS_Store' in names1: names1.remove('.DS_Store') names1 = sorted(names1, key=lambda x: int(x.split('.')[0])) names1 = [folder1 + '/' + name for name in names1] folder2 = 'Fig3_SourceData_2' names2 = listdir(folder2) if '.DS_Store' in names2: names2.remove('.DS_Store') names2 = sorted(names2, key=lambda x: int(x.split('.')[0])) names2 = [folder2 + '/' + name for name in names2] names = names1 + names2 df_mar = pd.DataFrame({ 'X': [], 'Y': [], 'Z': [], 'Information': [], 'Correlation': [], 'nCells': [], 'label': [] }) tags = ['TX', 'TL'] n = 0 num_samps_each = 5 total_x_wt = np.array([]) total_x_c = np.array([]) total_y_wt = np.array([]) total_z_wt = np.array([]) total_y_c = np.array([]) total_z_c = np.array([]) for j, name in enumerate(names): print(name) mat_contents = sio.loadmat(name) color_mat = mat_contents['data3D'] t_len = mat_contents['data3D'].shape[2] for t in range(t_len): x = color_mat[:, 5, t][np.where(~np.isnan(color_mat[:, 5, t]))] y = color_mat[:, 7, t][np.where(~np.isnan(color_mat[:, 5, t]))] z = color_mat[:, 9, t][np.where(~np.isnan(color_mat[:, 5, t]))] spearman = sp.spearmanr( color_mat[:, 7, t][np.where(~np.isnan(color_mat[:, 5, t]))], color_mat[:, 9, t][np.where(~np.isnan(color_mat[:, 5, t]))])[0] if n < 2: if n == 0: if t == t_len - 1: total_x_wt = np.append(total_x_wt, x.ravel()) total_y_wt = np.append(total_y_wt, y.ravel()) total_z_wt = np.append(total_z_wt, z.ravel()) if n == 1: if t == t_len - 1: total_x_c = np.append(total_x_c, x.ravel()) total_y_c = np.append(total_y_c, y.ravel()) total_z_c = np.append(total_z_c, z.ravel()) try: info = ee.mi(ee.vectorize(y), ee.vectorize(z), k=3) except: info = 0 n_cells = len(np.where(~np.isnan(color_mat[:, 5, t]))[0]) df2 = pd.DataFrame({ 'X': [np.std(x) / np.mean(x)], 'Y': [np.std(y) / np.mean(y)], 'Z': [np.std(z) / np.mean(z)], 'Information': [info], 'Correlation': [spearman], 'nCells': [n_cells], 'label': [tags[n]] }) frames = [df_mar, df2] df_mar = pd.concat(frames) if (j + 1) % num_samps_each == 0: n += 1 f3, ax3 = plt.subplots(1, 2, figsize=(10, 3)) ax3[0].set_xlim([-10, 300]) sns.set_style('white') colors = ['salmon', 'darkblue'] tags = ['TX', 'TL'] tvect = np.linspace(0, 400, 1000) recs = [] ax30 = ax3[0].twinx() for j, color in enumerate(colors): x, y = df_mar.loc[df_mar.label == tags[j]].nCells, df_mar.loc[ df_mar.label == tags[j]].X x, y2 = df_mar.loc[df_mar.label == tags[j]].nCells, df_mar.loc[ df_mar.label == tags[j]].Information ys = df_mar.loc[df_mar.label == tags[j]].Y zs = df_mar.loc[df_mar.label == tags[j]].Z nbins = 15 bins = np.linspace(0, 275, nbins) idx = np.digitize(x, bins) means = [0] errors = [0] means2 = [] errors2 = [] for i in range(nbins): if j == 0: ax3[0].errorbar(bins[i] + (200 / (2 * nbins)), np.mean(y[idx == i + 1]), fmt='o', color=color) ax3[1].errorbar(bins[i] + (200 / (2 * nbins)), np.mean(y2[idx == i + 1]), fmt='o', color=color) else: ax30.errorbar(bins[i] + (200 / (2 * nbins)), np.mean(y[idx == i + 1]), fmt='o', color=color) ax3[1].errorbar(bins[i] + (200 / (2 * nbins)), np.mean(y2[idx == i + 1]), fmt='o', color=color) # ax.errorbar(bins[i]+(200/(2*nbins)),np.mean(y[idx==i+1]),yerr=np.std(y[idx==i+1]),fmt='o',color=color) means.append(np.mean(y[idx == i + 1])) errors.append(sp.sem(y[idx == i + 1])) means2.append(np.mean(y2[idx == i + 1])) errors2.append(sp.sem(y2[idx == i + 1])) means = np.asarray(means) errors = np.asarray(errors) xvect = bins + (200 / (2 * nbins)) xvect = np.insert(xvect, 0, 0) if j == 0: ax3[0].fill_between(xvect, means - errors, means + errors, color=color, alpha=0.5) else: ax30.fill_between(xvect, means - errors, means + errors, color=color, alpha=0.5) means2 = np.asarray(means2) errors2 = np.asarray(errors2) ax3[1].fill_between(bins + (200 / (2 * nbins)), means2 - errors2, means2 + errors2, color=color, alpha=0.5) recs.append(mpatches.Rectangle((0, 0), 1, 1, fc=colors[j])) ax3[0].legend(recs[::-1], ['MarA Fusion', 'WT MarA'], title='Strain', loc=4) ax3[1].legend(recs[::-1], ['MarA Fusion', 'WT MarA'], title='Strain', loc=4) ax3[0].set_ylabel('Coefficient of Variation') ax3[0].set_title('Activator Variance over Time') ax3[0].set_xlabel('Number of Cells in Microcolony') ax3[1].set_xlabel('Number of Cells in Microcolony') ax3[1].set_title('Downstream coordination over time') ax3[1].set_ylabel('Infromation (bits)') ax3[0].set_ylim([0, 0.25]) plt.tight_layout() f3.savefig('figures/modified_marA_TS.pdf', bbox_inches='tight')
cortex = cortex.T subcortex = np.genfromtxt( "/Users/sudregp/Documents/surfaces/baseline_thalamusR_SA_NV_QCCIVETlt35_QCSUBePASS.csv", delimiter="," ) # removing first column and first row, because they're headers subcortex = scipy.delete(subcortex, 0, 1) subcortex = scipy.delete(subcortex, 0, 0) # format it to be subjects x variables subcortex = subcortex.T # selecting only a few vertices in the thalamus my_sub_vertices = range(subcortex.shape[1]) num_subjects = cortex.shape[0] X = cortex Y = subcortex[:, my_sub_vertices] MI = np.empty([X.shape[1], Y.shape[1]]) for x in range(X.shape[1]): print str(x + 1) + "/" + str(X.shape[1]) Xv = [[i] for i in X[:, x]] for y in range(Y.shape[1]): Yv = [[i] for i in Y[:, y]] MI[x, y] = ee.mi(Xv, Yv) np.savez(env.results + "structurals_mi_thalamus_striatum_NV_QCCIVETlt35_QCSUBePASS", MI=MI)
# lrate *= 10 y_size, x_size = 5, 25 h_size = (30, 10) nc0 = NeuronColumn(h_size[0], act) nc1 = NeuronColumn(h_size[1], act) nc2 = NeuronColumn(y_size, Linear()) # x, y, loss = linear_setup(x_size, y_size, 1000) x, y, loss = non_linear_setup(x_size, y_size, 1000) # x, y, loss = binary_setup(x_size, y_size, 1000) # mi = lambda a, b: np.mean([ee.mi(a, b, k = 3) for _ in xrange(1)]) mi = lambda a, b: ee.mi(a, b, k=3) # mi = lambda a, b: ee2.mutual_information((a, b), k=3) info, grad_stat = [], [] for e in xrange(epochs): h0, afactor0 = nc0(x) h1, afactor1 = nc1(h0) y_hat, afactor2 = nc2(h1) error, error_deriv = loss(y_hat, y) if lrule == LearningRule.BP: dnc2 = error_deriv * afactor2 dnc1 = np.dot(dnc2, nc2.p.W.T) * afactor1 dnc0 = np.dot(dnc1, nc1.p.W.T) * afactor0
def run(self): self.result = ee.mi(self.x, self.y, k=10)
nc0 = NeuronColumn(h_size[0], act) nc1 = NeuronColumn(h_size[1], act) nc2 = NeuronColumn(y_size, Linear()) # x, y, loss = linear_setup(x_size, y_size, 1000) x, y, loss = non_linear_setup(x_size, y_size, 1000) # x, y, loss = binary_setup(x_size, y_size, 1000) # mi = lambda a, b: np.mean([ee.mi(a, b, k = 3) for _ in xrange(1)]) mi = lambda a, b: ee.mi(a, b, k = 3) # mi = lambda a, b: ee2.mutual_information((a, b), k=3) info, grad_stat = [], [] for e in xrange(epochs): h0, afactor0 = nc0(x) h1, afactor1 = nc1(h0) y_hat, afactor2 = nc2(h1) error, error_deriv = loss(y_hat, y) if lrule == LearningRule.BP:
def isIndependentMI2(self, d1, d2): p1 = self.mi_prepare(d1) p2 = self.mi_prepare(d2) #print ('d1 = ', d2) mi = ee.mi(p1, p2) return mi
data = np.concatenate((sig_set, back_set), axis=0) # data = [sig target weights # back targets weigths] # Weight preprocessing # #data[:,2] /= np.max(data[:,2]) #data[:,3] /= np.max(data[:,3]) valid_id = np.logical_and(data[:, 2] > 10e-35, data[:, 3] > 10e-35) data = data[valid_id] data[:, 2] = -np.log10(data[:, 2]) data[:, 3] = -np.log10(data[:, 3]) print(data[:, 6].shape) print("Mi with masses : ", mi(np.c_[data[:, :2], data[:, 4:6]], data[:, 6].reshape(-1, 1))) print("Mi with masses and weights : ", mi(data[:, :6], data[:, 6].reshape(-1, 1))) sys.exit() min_max_scaler = preprocessing.MinMaxScaler(feature_range=(np.amin(data[:, 0]), np.amax(data[:, 0]))) #data = np.c_[min_max_scaler.fit_transform(data[:,:6]),data[:,6:]] data = np.c_[data[:, :2], min_max_scaler.fit_transform(data[:, 2:4]), data[:, 4:]] #data = np.c_[preprocessing.scale(data[:,:6]),data[:,6:]] for i in range(0, 50): print(data[i, 2:4]) print('Total learning size = ', data.shape[0])
def scoreDependence(X, Y): dep = ee.mi(ee.vectorize(X), ee.vectorize(Y)) return dep
if display_fig: fig2 = plt.figure() ax2 = fig2.subplots() mis = [] t = np.linspace(0,n_frames/fs, n_frames) for face in range(n_faces): print('\nProcessing Face number ' + str(face+1) + ' of ' + str(n_faces) + '...') for i in range(n_frames-win_len): currV = V[face,i:i+win_len,:] currA = A[face,i:i+win_len,:] mi[i+win_len] = np.abs(ee.mi(currV,currA)) mi[:win_len] = np.ones(win_len)*mi[win_len] mi_smoothed = np.expand_dims(smooth(medfilt(mi,49), 9), axis=1) mis.append(mi_smoothed) if display_fig: ax2.plot(t, mi_smoothed, label='Face'+str(face)) if display_fig: ax2.legend() ax2.grid() ax2.set_title('Mutual Information between Audio and Video') mis = np.hstack(mis) whospeaks = np.zeros(mis.shape[0])
subcortex = np.genfromtxt( '/Users/sudregp/Documents/surfaces/baseline_thalamusR_SA_NV_QCCIVETlt35_QCSUBePASS.csv', delimiter=',') # removing first column and first row, because they're headers subcortex = scipy.delete(subcortex, 0, 1) subcortex = scipy.delete(subcortex, 0, 0) # format it to be subjects x variables subcortex = subcortex.T # selecting only a few vertices in the thalamus my_sub_vertices = range(subcortex.shape[1]) num_subjects = cortex.shape[0] X = cortex Y = subcortex[:, my_sub_vertices] MI = np.empty([X.shape[1], Y.shape[1]]) for x in range(X.shape[1]): print str(x + 1) + '/' + str(X.shape[1]) Xv = [[i] for i in X[:, x]] for y in range(Y.shape[1]): Yv = [[i] for i in Y[:, y]] MI[x, y] = ee.mi(Xv, Yv) np.savez(env.results + 'structurals_mi_thalamus_striatum_NV_QCCIVETlt35_QCSUBePASS', MI=MI)
def mutualInformation(x, y, k): return ee.mi(np.array([x]).T, np.array([y]).T, k=k)
from math import floor from util import shm, shl import entropy as ke def mi(x, y): return -ee.entropyd(np.concatenate( [x, y], axis=1)) + ee.entropyd(x) + ee.entropyd(y) def discretize(x, min_x=0.0, max_x=1.0, n_bins=10): x_discrete = np.zeros((x.shape[0], x.shape[1] * n_bins)) for v_id, v in enumerate(x): for di, subv in enumerate(v): bin_id = int( floor(n_bins * (max(min(subv, max_x - 1e-08), min_x) - min_x) / (max_x - min_x))) x_discrete[v_id, di * n_bins + bin_id] += 1 return x_discrete x = np.random.randn(10000, 10) y = np.random.randn(10000, 10) xd = discretize(x, min_x=-1.0, max_x=1.0, n_bins=2) yd = discretize(y, min_x=-1.0, max_x=1.0, n_bins=2) print mi(yd, xd) print ee.mi(x, y, k=2)
print 'Mutual Information' trueent = 0.5*(1+log(2.*pi*cov[0][0])) #x sub trueent += 0.5*(1+log(2.*pi*cov[1][1])) #y sub trueent += -0.5*(2+log(4.*pi*pi*det([[cov[0][0],cov[0][1]],[cov[1][0],cov[1][1]]] ))) #xz sub print 'true MI(x:y)', trueent/log(2) ent = [] err = [] for NN in Ntry: tempent = [] for j in range(nsamples): points = nr.multivariate_normal(mean,cov,NN) x = [point[:1] for point in points] y = [point[1:2] for point in points] tempent.append(ee.mi(x,y)) tempent.sort() tempmean = np.mean(tempent) ent.append(tempmean) err.append((tempmean - tempent[samplo],tempent[samphi]-tempmean)) print 'samples used',Ntry print 'estimated MI',ent print '95% conf int.\n',err print '\nIF you permute the indices of x, e.g., MI(X:Y) = 0' #You can use shuffle_test method to just get mean, standard deviation ent = [] err = [] for NN in Ntry:
# In this case, I use the average entropy # over the training examples iXT1.append(t1_ent / h_cnt) print(iXT1) iXT2.append(t2_ent / h_cnt) print(iXT2) iXT3.append(t3_ent / h_cnt) print(iXT3) iXT4.append(t4_ent / h_cnt) print(iXT4) # Estimate the continuous mutual information using the # k-nearst neighbors estimator # https://github.com/gregversteeg/NPEET ys = ee.vectorize(ys) iTY1.append(ee.mi(t1s, ys)) print(iTY1) iTY2.append(ee.mi(t2s, ys)) print(iTY2) iTY3.append(ee.mi(t3s, ys)) print(iTY3) iTY4.append(ee.mi(t4s, ys)) print(iTY4) xs = [] ys = [] t1s = [] t2s = [] t3s = [] t4s = []
import entropy_estimators as ee import numpy as np from math import floor from util import shm, shl import entropy as ke def mi(x, y): return -ee.entropyd(np.concatenate([x, y], axis=1)) + ee.entropyd(x) + ee.entropyd(y) def discretize(x, min_x=0.0, max_x=1.0, n_bins=10): x_discrete = np.zeros((x.shape[0], x.shape[1]*n_bins)) for v_id, v in enumerate(x): for di, subv in enumerate(v): bin_id = int(floor(n_bins * (max(min(subv, max_x-1e-08), min_x)-min_x)/(max_x - min_x))) x_discrete[v_id, di*n_bins + bin_id] += 1 return x_discrete x = np.random.randn(10000, 10) y = np.random.randn(10000, 10) xd = discretize(x, min_x = -1.0, max_x = 1.0, n_bins=2) yd = discretize(y, min_x = -1.0, max_x = 1.0, n_bins=2) print mi(yd, xd) print ee.mi(x, y, k=2)
def calc_MI_npeet(x, y): return ee.mi(x.reshape((x.shape[0], 1)), y.reshape((y.shape[0], 1)), base=2)