def test_margins(): a = np.array([1]) m = margins(a) assert_equal(len(m), 1) m0 = m[0] assert_array_equal(m0, np.array([1])) a = np.array([[1]]) m0, m1 = margins(a) expected0 = np.array([[1]]) expected1 = np.array([[1]]) assert_array_equal(m0, expected0) assert_array_equal(m1, expected1) a = np.arange(12).reshape(2, 6) m0, m1 = margins(a) expected0 = np.array([[15], [51]]) expected1 = np.array([[6, 8, 10, 12, 14, 16]]) assert_array_equal(m0, expected0) assert_array_equal(m1, expected1) a = np.arange(24).reshape(2, 3, 4) m0, m1, m2 = margins(a) expected0 = np.array([[[66]], [[210]]]) expected1 = np.array([[[60], [92], [124]]]) expected2 = np.array([[[60, 66, 72, 78]]]) assert_array_equal(m0, expected0) assert_array_equal(m1, expected1) assert_array_equal(m2, expected2)
def test_margins(): a = np.array([1]) m = margins(a) assert_equal(len(m), 1) m0 = m[0] assert_array_equal(m0, np.array([1])) a = np.array([[1]]) m0, m1 = margins(a) expected0 = np.array([[1]]) expected1 = np.array([[1]]) assert_array_equal(m0, expected0) assert_array_equal(m1, expected1) a = np.arange(12).reshape(2, 6) m0, m1 = margins(a) expected0 = np.array([[15], [51]]) expected1 = np.array([[6, 8, 10, 12, 14, 16]]) assert_array_equal(m0, expected0) assert_array_equal(m1, expected1) a = np.arange(24).reshape(2, 3, 4) m0, m1, m2 = margins(a) expected0 = np.array([[[66]], [[210]]]) expected1 = np.array([[[60], [92], [124]]]) expected2 = np.array([[[60, 66, 72, 78]]]) assert_array_equal(m0, expected0) assert_array_equal(m1, expected1) assert_array_equal(m2, expected2)
def compute_mi(cov_xy=0.5, n_bins=100): """Analytic computation of MI using binned 2D Gaussian Arguments: cov_xy (list): Off-diagonal elements of covariance matrix n_bins (int): Number of bins to "quantize" the continuous 2D Gaussian """ cov = [[1, cov_xy], [cov_xy, 1]] data = sample(cov=cov) # get joint distribution samples # perform histogram binning joint, edge = np.histogramdd(data, bins=n_bins) joint /= joint.sum() eps = np.finfo(float).eps joint[joint < eps] = eps # compute marginal distributions x, y = margins(joint) xy = x * y xy[xy < eps] = eps # MI is P(X,Y)*log(P(X,Y)/P(X)*P(Y)) mi = joint * np.log(joint / xy) mi = mi.sum() print("Computed MI: %0.6f" % mi) return mi
def marginal(dist, dim): """ Compute marginal of distribution dist along axis dim. Note: Computes all marginals and returns the one asked, so might be slow """ ms = margins(dist) # compute all marginals return np.squeeze(ms[dim]) # pick out the desired one
def stdres(observed, expected): n = observed.sum() rsum, csum = margins(observed) # With integers, the calculation # csum * rsum * (n - rsum) * (n - csum) # might overflow, so convert rsum and csum to floating point. rsum = rsum.astype(np.float64) csum = csum.astype(np.float64) v = csum * rsum * (n - rsum) * (n - csum) / n**3 return (observed - expected) / np.sqrt(v)
def compute_mi(cov_xy=0.9, n_bins=100): cov = [[1, cov_xy], [cov_xy, 1]] data = sample(cov=cov) joint, edge = np.histogramdd(data, bins=n_bins) joint /= joint.sum() eps = np.finfo(float).eps joint[joint < eps] = eps x, y = margins(joint) xy = x * y xy[xy < eps] = eps mi = joint * np.log(joint / xy) mi = mi.sum() print("Computed MI:", mi) return mi
def std_res(observed, expected): """ :param observed: an 2-by-n numpy array containing the observed frequencies :param expected: an 2-by-n numpy array containing the expected frequencies under the null hypothesis :return res: the standardized Pearson's residuals indicating which dimensions in the observed data show the stronger deviation from the expected frequencies """ n = observed.sum() rsum, csum = contingency.margins(observed) v = csum * rsum * (n - rsum) * (n - csum) / float(n**3) res = (observed - expected) / np.sqrt(v) return res
def compute_mi(cov_xy=0.5, n_bins=100): cov=[[1, cov_xy], [cov_xy, 1]] data = sample(cov=cov) # get joint distribution samples # perform histogram binning joint, edge = np.histogramdd(data, bins=n_bins) joint /= joint.sum() eps = np.finfo(float).eps joint[joint<eps] = eps # compute marginal distributions x, y = margins(joint) xy = x*y xy[xy<eps] = eps # MI is P(X,Y)*log(P(X,Y)/P(X)*P(Y)) mi = joint*np.log(joint/xy) mi = mi.sum() print("Computed MI: %0.6f" % mi) return mi
def uncertainty(j): """ Use the Shannon entropy of a marginal distribution and Shannon entropy of the joint distribution to calculate nonlinear dependence using our uncertainty of a probability distribution. The joint distribution j between X and Y must be known as a 2-D array. """ x, y = margins(j) # x and y describe the joint probability margins (marginal distribution) Hx = 0 # Shannon entropy of the x marginal distribution for i in x: Hx += i*np.log(i) Hx = -Hx # flip the sign for entropy Hy = 0 # mutatis mutandis for y for i in y: Hy += i.np.log(i) Hy = -Hy Hxy = 0 # Shannon entropy of the joint distribution for i in x: for j in y: Hxy += (i*j)*np.log(i*j) Hxy = -Hxy return Hx + Hy - Hxy # This can be a measure of mutual information from a joint probability distribution
def marginal(dist, dim): """Compute marginal of distribution dist along axis dim""" # Note, this computes all marginals and returns the one asked, so this # might be slow in some cases. ms = margins(dist) # compute all marginals return np.squeeze(ms[dim]) # get the right one
def stdres(observed, expected): n = observed.sum() rsum, csum = margins(observed) v = csum * rsum * (n - rsum) * (n - csum) / n**3 return (observed - expected) / np.sqrt(v)