def predict_probability_area(model, upper_bound, lower_bound): """ Predict the probability that the true location is within a specified bounding box given a GMM model Args: model (mixture.GMM): GMM model to use upper_bound (list): [upper lat, right lon] of bounding box lower_bound (list): [lower_lat, left_lon] of bounding box Returns: total_prob (float): Probability from 0 to 1 of true location being in bounding box """ total_prob = 0 for i in range(0, len(model.weights_)): val = ext.mvnormcdf(upper_bound, model.means_[i], model.covars_[i], lower_bound, maxpts=2000) # below is necessary as a very rare occurance causes some guassians to have a result of nan #(likely exeedingly low probability) if math.isnan(val): pass else: weighted_val = val * model.weights_[i] total_prob += weighted_val return total_prob
def N2_f(d1, d2, rho): muStandardNormal = 0.0 varStandardNormal = 1.0 upper = ([d1, d2]) #상한 v = varStandardNormal # 단순화 mu = muStandardNormal covM = ([v, rho], [rho, v]) return extras.mvnormcdf(upper, mu, covM)
def N2_f(d1, d2, rho): import statsmodels.sandbox.distributions.extras as extras muStandardNormal = 0.0 # mean of a standard normal distribution varStandardNormal = 1.0 # variance of standard normal distribution upper = ([d1, d2]) # upper bound for two values v = varStandardNormal # simplify our notations mu = muStandardNormal # simplify our notations covM = ([v, rho], [rho, v]) return extras.mvnormcdf(upper, mu, covM)
def predict_probability_area(model, upper_bound, lower_bound): total_prob = 0 for i in range(0, len(model.weights_)): val = ext.mvnormcdf(upper_bound, model.means_[i], model.covars_[i], lower_bound, maxpts=2000) # below is necessary as a very rare occurance causes some guassians to have a result of nan #(likely exeedingly low probability) if math.isnan(val): pass else: weighted_val = val * model.weights_[i] total_prob += weighted_val return total_prob
def _compute_mvnorm_image_dim_2(upper_bound, out_res, mu, sigma): out_image = np.zeros([out_res] * 2) x_vals = np.linspace(0, upper_bound, out_res + 1) y_vals = np.linspace(0, upper_bound, out_res + 1) for i in range(out_res): for j in range(out_res): out_image[j, i] = mvnormcdf([x_vals[i + 1], y_vals[j + 1]], mu, sigma, lower=[x_vals[i], y_vals[j]]) return out_image
def _cross_moments_inner(ksi, eta, means, stds, rho, handle): '''''' mean, cov = simulation.param_converter(means, stds, rho) assert handle.shape == (2, 1) adj_mean = np.squeeze(mean + cov.dot(handle), axis=1) assert adj_mean.shape == (2, ), "{}".format(adj_mean.shape) mut = 0.5 * (((adj_mean.T).dot(inv(cov))).dot(adj_mean)) - 0.5 * (( (mean.T).dot(inv(cov))).dot(mean)) upper = np.zeros((2, )) upper[0] = np.log(ksi / X_0[0]) upper[1] = np.log(eta / X_0[1]) ret = - 1 + g_func(ksi/X_0[0], mean=adj_mean[0], sigma2=stds[0]**2) \ + g_func(eta/X_0[1], mean=adj_mean[1], sigma2=stds[1]**2) \ + mvnormcdf(upper, adj_mean, cov) ret *= np.exp(mut) return ret
def N2_f(d1, d2, rho): """cumulative bivariate standard normal distribution d1: the first value d2: the second value rho: correlation Example1: print(N2_f(0,0,1.)) => 0.5 Example2: print(N2_f(0,0,0) => 0.25 """ import statsmodels.sandbox.distributions.extras as extras muStandardNormal = 0.0 # mean of a standard normal distribution varStandardNormal = 1.0 # variance of standard normal distribution upper = ([d1, d2]) # upper bound for two values v = varStandardNormal # simplify our notations mu = muStandardNormal # simplify our notations covM = ([v, rho], [rho, v]) return extras.mvnormcdf(upper, mu, covM)
def get_normal_probabilities(num_bins, mean, cov): """This function returns a grid of binned normal probabilities.""" # Currently this only works for four-dimensional normal distribution. num_dims = 4 q = np.tile(np.nan, [num_bins] * num_dims) grids = list() for i in range(num_dims): scale = np.sqrt(cov[i, i]) lower, upper = -1.96 * scale, 1.96 * scale grid = np.linspace(lower, upper, num_bins - 1, endpoint=True) grid = np.concatenate(([-np.inf], grid, [np.inf]), axis=0) grids += [grid] wv, xv, yv, zv = np.meshgrid(*grids, indexing='ij') for i in range(1, num_bins + 1): for j in range(1, num_bins + 1): for k in range(1, num_bins + 1): for l in range(1, num_bins + 1): w_upper, w_lower = wv[i, j, k, l], wv[i - 1, j, k, l] x_upper, x_lower = xv[i, j, k, l], xv[i, j - 1, k, l] y_upper, y_lower = yv[i, j, k, l], yv[i, j, k - 1, l] z_upper, z_lower = zv[i, j, k, l], zv[i, j, k, l - 1] upper = [w_upper, x_upper, y_upper, z_upper] lower = [w_lower, x_lower, y_lower, z_lower] q[i - 1, j - 1, k - 1, l - 1] = mvnormcdf(upper, mean, cov, lower) # Getting started with some basic consistency checks. np.testing.assert_equal(np.all(q >= 0), True) np.testing.assert_equal(0.98 < np.sum(q) < 1.02, True) # Scaling output to ensure that probabilities sum to one. q = q / np.sum(q) return q, grid
def marg_cdf(self, u): """ u is DataFrame n_obs x targets """ targets = list(u.columns) x = self.make_input(u) if len(targets) <= 1: # standard univariate normal res = ss.norm.cdf(x) res = pd.Series(res[:, 0], index=u.iloc[:, 0]) # res = pd.Series(res, index=u[:, 0]) else: # mvnormcdf does not accept multiple input points res = np.zeros(x.shape[0]) ml = [0] * len(targets) for i in range(x.shape[0]): xl = np.array(x.iloc[i, :]) cv = np.array(self.cr.loc[targets, targets]) res[i] = mvnormcdf(xl, ml, cv) res = pd.Series(res, index=range(u.shape[0])) res.name = 'Cond CDF of ' + ', '.join(targets) return res
def cond_cdf(self, u, u_cond): """ u is DataFrame n_obs x targets u_cond is DataFrame 1 x conditionals """ uu = u.values[:, 0] self.fit_cond(targets=u.columns, conditionals=u_cond.columns) x, x_cond, mn = self.make_input(u, u_cond) if len(self.targets) <= 1: # univariate normal res = ss.norm.cdf(x, mn.iloc[0], self.cond_cov.iloc[0, 0] ** 0.5) res = pd.Series(res[:, 0], index=uu) # res = pd.Series(res, index=u[:, 0]) else: # mvnormcdf does not accept multiple input points res = np.zeros(x.shape[0]) for i in range(x.shape[0]): xl = np.array(x.iloc[i, :]) ml = np.array(mn)[0] cv = np.array(self.cond_cov) res[i] = mvnormcdf(xl, ml, cv) res = pd.Series(res, index=range(u.shape[0])) res.name = 'Cond CDF of ' + ', '.join(self.targets.astype('str')) return res
def threshold_prob(YY, index, GG, beta, mu, Vp, h2, FF, TT, maxpts_mult=20000, log_out=True, abseps=None, releps=None, genz=False): """Calculate the probability of binary phenotypes in a pedigree, conditional on index individuals. Keyword arguments: YY -- Binary phenotype array, numpy array with 0 for below thresh, 1 for above index -- List of index patient indexes GG -- Genotypes, numpy array of 0,1,2 giving the number of alleles beta -- Effect size of the Mendelian locus mu -- Mean population trait value Vp -- Population trait variance h2 -- Trait heritability FF -- Kinship matrix TT -- Trait threshold for exhibiting the phenotype """ n_above = np.sum(YY) n_below = np.size(YY) - n_above below_FF = subset_matrix(YY, FF, 0) above_FF = subset_matrix(YY, FF, 1) below_GG = np.array([GG_i for ii, GG_i in enumerate(GG) if YY[ii] == 0]) above_GG = np.array([GG_i for ii, GG_i in enumerate(GG) if YY[ii] == 1]) YY_index = np.array([1 * (ii in index) for ii, _ in enumerate(YY)]) index_FF = subset_matrix(YY_index, FF, 1) GG_index = [GG_i for ii, GG_i in enumerate(GG) if ii in index] lower_lims = [ xx if xx == xx else TT for xx in -np.inf * (1 - np.array(YY)) ] upper_lims = [xx if xx == xx else TT for xx in np.inf * np.array(YY)] means = np.ones(np.size(YY)) * mu + np.array(GG) * beta cov = Vp * h2 * FF + Vp * (1 - h2) * np.identity(np.size(YY)) if genz: # infin = np.zeros(len(lower_lims)) # for ii, lower_lim in enumerate(lower_lims): # if lower_lim == TT: # infin[ii] = 1 # correl = np.zeros(len(lower_lims)*(len(lower_lims)-1)) # error, P1, inform = mvn.mvndst(lower=lower_lims, upper=upper_lims, # infin=infin, correl=cov)\ if abseps is None: P1 = mvstdnormcdf(lower=lower_lims, upper=upper_lims, corrcoef=cov, maxpts=np.size(YY) * maxpts_mult) else: P1 = mvstdnormcdf(lower=lower_lims, upper=upper_lims, corrcoef=cov, maxpts=np.size(YY) * maxpts_mult, abseps=abseps) if releps is None: P1 = mvnormcdf(lower=lower_lims, upper=upper_lims, mu=means, cov=cov, maxpts=np.size(YY) * maxpts_mult) else: P1 = mvnormcdf(lower=lower_lims, upper=upper_lims, mu=means, cov=cov, maxpts=np.size(YY) * maxpts_mult, releps=releps) lower_lims_index = [xx for ii, xx in enumerate(lower_lims) if ii in index] upper_lims_index = [xx for ii, xx in enumerate(upper_lims) if ii in index] YY_index_only = np.array([xx for ii, xx in enumerate(YY) if ii in index]) means_index = (np.ones(np.size(YY_index_only)) * mu + np.array(GG_index) * beta) cov_index = (Vp * h2 * index_FF + Vp * (1 - h2) * np.identity(np.size(YY_index_only))) if np.size(lower_lims_index) > 1: P2 = mvnormcdf(lower=lower_lims_index, upper=upper_lims_index, mu=np.array(means_index), cov=cov_index) else: if lower_lims_index[0] == -np.inf: P2 = norm.cdf(upper_lims_index[0], loc=means_index[0], scale=np.sqrt(cov_index[0, 0])) else: P2 = 1 - norm.cdf(lower_lims_index[0], loc=means_index[0], scale=np.sqrt(cov_index[0, 0])) if log_out: return np.log(P1) - np.log(P2) else: return P1 / P2
def Mfunc(V, H, F, tau, R=0.05, sigmaH=0.3, sigmaV=0.3, rho_VH=0.5): def covmat(rho): return np.array([[1, rho], [rho, 1]]) u0 = np.array([0, 0]) sigma = np.sqrt(sigmaV**2 + sigmaH**2 - 2 * rho_VH * sigmaV * sigmaH) if F > 0: if H > 0: gamma1 = (np.log(H/F) + (R - .5*sigmaH**2)*tau) / \ (sigmaH*np.sqrt(tau)) else: gamma1 = (-np.inf + (R - .5 * sigmaH**2) * tau) / (sigmaH * np.sqrt(tau)) if V > 0: gamma2 = (np.log(V/F) + (R - .5*sigmaV**2)*tau) / \ (sigmaV*np.sqrt(tau)) else: gamma2 = (-np.inf + (R - .5 * sigmaV**2) * tau) / (sigmaV * np.sqrt(tau)) else: if H > 0: gamma1 = (np.inf + (R - .5 * sigmaH**2) * tau) / (sigmaH * np.sqrt(tau)) else: gamma1 = (+(R - .5 * sigmaH**2) * tau) / (sigmaH * np.sqrt(tau)) if V > 0: gamma2 = (np.inf + (R - .5 * sigmaV**2) * tau) / (sigmaV * np.sqrt(tau)) else: gamma2 = (+(R - .5 * sigmaV**2) * tau) / (sigmaV * np.sqrt(tau)) alpha1 = gamma1 + sigmaH * np.sqrt(tau) if H > 0: if V > 0: alpha2 = (np.log(V / H) - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) else: alpha2 = (-np.inf - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) else: if V > 0: alpha2 = (np.inf - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) else: alpha2 = (-0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) beta1 = gamma2 + sigmaV * np.sqrt(tau) if V > 0: if H > 0: beta2 = (np.log(H / V) - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) else: beta2 = (-np.inf - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) else: if H > 0: beta2 = (-0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) else: beta2 = (-0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau)) l1 = np.array([alpha1, alpha2]).flatten() if any(l1 == -np.inf): t1 = 0.0 else: t1 = H * mvnormcdf(l1, u0, covmat((rho_VH * sigmaV - sigmaH) / sigma)) l2 = np.array([beta1, beta2]).flatten() if any(l2 == -np.inf): t2 = 0.0 else: t2 = V * mvnormcdf(l2, u0, covmat((rho_VH * sigmaH - sigmaV) / sigma)) l3 = np.array([gamma1, gamma2]).flatten() if any(l3 == -np.inf): t3 = 0.0 else: t3 = F * np.exp(-R * tau) * mvnormcdf(l3, u0, covmat(rho_VH)) if np.isnan(t1) or np.isnan(t2) or np.isnan(t3): ipdb.set_trace() return t1 + t2 - t3