def find_latent_gaussian(bin_means, bin_cov, accuracy=1e-10): """ Compute parameters for the hidden Gaussian random vector U generating the binary Bernulli vector X with mean m and covariances c according to X = 0 <=> U < -g X = 1 <=> U > -g Adapted from `<www.kyb.mpg.de/bethgegroup/code/efficientsampling>`_ Parameters ---------- bin_means : Type Description bin_cov : Type Description accuracy : int, optional Description (default 1e-10) Returns ------- Value : Type Description """ from statsmodels.sandbox.distributions.multivariate import mvstdnormcdf if np.any(bin_means < 0) or np.any(bin_means >= 1): raise Exception("Mean for Gaussians has to be between 0 and 1!") d = len(bin_means) gauss_mean = np.array([ltqnorm(m) for m in bin_means]) gauss_cov = np.eye(d) for i in range(d): for j in range(i + 1, d): c_min = -1 c_max = 1 # constant pn = bin_means[[i, j]].prod() # check whether DG distribution for covariance exists if (bin_cov[i, j] - mvstdnormcdf(-gauss_mean[[i, j]], np.array([np.inf, np.inf]), -1) + pn) < -1e-3 or \ (bin_cov[i, j] - mvstdnormcdf(-gauss_mean[[i, j]], np.array([np.inf, np.inf]), 1) + pn) > 1e-3: raise Exception( 'A joint Bernoulli distribution with the given covariance matrix does not exist!' ) # determine Lambda_ij iteratively by bisection (Psi is monotonous in rho) while c_max - c_min > accuracy: c_new = (c_max + c_min) / 2. if bin_cov[i, j] > mvstdnormcdf(-gauss_mean[[i, j]], np.array([np.inf, np.inf]), c_new) - pn: c_min = c_new else: c_max = c_new gauss_cov[i, j] = gauss_cov[j, i] = c_max return gauss_mean.reshape(len(bin_means), 1), gauss_cov
def find_latent_gaussian(bin_means, bin_cov, accuracy=1e-10): """ Compute parameters for the hidden Gaussian random vector U generating the binary Bernulli vector X with mean m and covariances c according to X = 0 <=> U < -g X = 1 <=> U > -g Adapted from `<www.kyb.mpg.de/bethgegroup/code/efficientsampling>`_ Parameters ---------- bin_means : Type Description bin_cov : Type Description accuracy : int, optional Description (default 1e-10) Returns ------- Value : Type Description """ from statsmodels.sandbox.distributions.multivariate import mvstdnormcdf if np.any(bin_means < 0) or np.any(bin_means >= 1): raise Exception("Mean for Gaussians has to be between 0 and 1!") d = len(bin_means) gauss_mean = np.array([ltqnorm(m) for m in bin_means]) gauss_cov = np.eye(d) for i in range(d): for j in range(i + 1, d): c_min = -1 c_max = 1 # constant pn = bin_means[[i, j]].prod() # check whether DG distribution for covariance exists if (bin_cov[i, j] - mvstdnormcdf(-gauss_mean[[i, j]], np.array([np.inf, np.inf]), -1) + pn) < -1e-3 or \ (bin_cov[i, j] - mvstdnormcdf(-gauss_mean[[i, j]], np.array([np.inf, np.inf]), 1) + pn) > 1e-3: raise Exception('A joint Bernoulli distribution with the given covariance matrix does not exist!') # determine Lambda_ij iteratively by bisection (Psi is monotonous in rho) while c_max - c_min > accuracy: c_new = (c_max + c_min) / 2. if bin_cov[i, j] > mvstdnormcdf(-gauss_mean[[i, j]], np.array([np.inf, np.inf]), c_new) - pn: c_min = c_new else: c_max = c_new gauss_cov[i, j] = gauss_cov[j, i] = c_max return gauss_mean.reshape(len(bin_means), 1), gauss_cov
def test_mvn_mvt_1(self): a, b = self.a, self.b df = self.df corr_equal = self.corr_equal #result from R, mvtnorm with option #algorithm = GenzBretz(maxpts = 100000, abseps = 0.000001, releps = 0) # or higher probmvt_R = 0.60414 #report, ed error approx. 7.5e-06 probmvn_R = 0.673970 #reported error approx. 6.4e-07 assert_almost_equal(probmvt_R, mvstdtprob(a, b, corr_equal, df), 4) assert_almost_equal(probmvn_R, mvstdnormcdf(a, b, corr_equal, abseps=1e-5), 4) mvn_high = mvstdnormcdf(a, b, corr_equal, abseps=1e-8, maxpts=10000000) assert_almost_equal(probmvn_R, mvn_high, 5)
def test_mvn_mvt_2(self): a, b = self.a, self.b df = self.df corr2 = self.corr2 probmvn_R = 0.6472497 #reported error approx. 7.7e-08 probmvt_R = 0.5881863 #highest reported error up to approx. 1.99e-06 assert_almost_equal(probmvt_R, mvstdtprob(a, b, corr2, df), 4) assert_almost_equal(probmvn_R, mvstdnormcdf(a, b, corr2, abseps=1e-5), 4)
def test_mvn_mvt_5(self): a, bl = self.a, self.b df = self.df corr2 = self.corr2 #unequal integration bounds #print "ue" a3 = np.array([0.5, -0.5, 0.5]) probmvn_R = 0.06910487 #using higher precision in R, error approx. 3.5e-08 probmvt_R = 0.05797867 #using higher precision in R, error approx. 5.8e-08 assert_almost_equal(mvstdtprob(a3, a3+1, corr2, df), probmvt_R, 4) assert_almost_equal(probmvn_R, mvstdnormcdf(a3, a3+1, corr2, maxpts=100000, abseps=1e-5), 4)
def test_mvn_mvt_3(self): a, b = self.a, self.b df = self.df corr2 = self.corr2 #from -inf #print 'from -inf' a2 = a.copy() a2[:] = -np.inf probmvn_R = 0.9961141 #using higher precision in R, error approx. 6.866163e-07 probmvt_R = 0.9522146 #using higher precision in R, error approx. 1.6e-07 assert_almost_equal(probmvt_R, mvstdtprob(a2, b, corr2, df), 4) assert_almost_equal(probmvn_R, mvstdnormcdf(a2, b, corr2, maxpts=100000, abseps=1e-5), 4)
def test_mvn_mvt_4(self): a, bl = self.a, self.b df = self.df corr2 = self.corr2 #from 0 to inf #print '0 inf' a2 = a.copy() a2[:] = -np.inf probmvn_R = 0.1666667 #error approx. 6.1e-08 probmvt_R = 0.1666667 #error approx. 8.2e-08 assert_almost_equal(probmvt_R, mvstdtprob(np.zeros(3), -a2, corr2, df), 4) assert_almost_equal(probmvn_R, mvstdnormcdf(np.zeros(3), -a2, corr2, maxpts=100000, abseps=1e-5), 4)
def test_mvn_mvt_3(self): a, b = self.a, self.b df = self.df corr2 = self.corr2 a2 = a.copy() a2[:] = -np.inf # using higher precision in R, error approx. 6.866163e-07 probmvn_R = 0.9961141 # using higher precision in R, error approx. 1.6e-07 probmvt_R = 0.9522146 quadkwds = {'epsabs': 1e-08} probmvt = mvstdtprob(a2, b, corr2, df, quadkwds=quadkwds) assert_allclose(probmvt_R, probmvt, atol=5e-4) probmvn = mvstdnormcdf(a2, b, corr2, maxpts=100000, abseps=1e-5) assert_allclose(probmvn_R, probmvn, atol=1e-4)