def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) cov = stabilize(self.cov_func(X)) shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: chi2 = pm.ChiSquared(name + "_chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=shape, **kwargs) f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) else: f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs) return f
def _sample_pymc3(cls, dist, size, seed): """Sample from PyMC3.""" import pymc3 pymc3_rv_map = { 'BetaDistribution': lambda dist: pymc3.Beta( 'X', alpha=float(dist.alpha), beta=float(dist.beta)), 'CauchyDistribution': lambda dist: pymc3.Cauchy( 'X', alpha=float(dist.x0), beta=float(dist.gamma)), 'ChiSquaredDistribution': lambda dist: pymc3.ChiSquared('X', nu=float(dist.k)), 'ExponentialDistribution': lambda dist: pymc3.Exponential('X', lam=float(dist.rate)), 'GammaDistribution': lambda dist: pymc3.Gamma( 'X', alpha=float(dist.k), beta=1 / float(dist.theta)), 'LogNormalDistribution': lambda dist: pymc3.Lognormal( 'X', mu=float(dist.mean), sigma=float(dist.std)), 'NormalDistribution': lambda dist: pymc3.Normal('X', float(dist.mean), float(dist.std)), 'GaussianInverseDistribution': lambda dist: pymc3.Wald( 'X', mu=float(dist.mean), lam=float(dist.shape)), 'ParetoDistribution': lambda dist: pymc3.Pareto( 'X', alpha=float(dist.alpha), m=float(dist.xm)), 'UniformDistribution': lambda dist: pymc3.Uniform( 'X', lower=float(dist.left), upper=float(dist.right)) } dist_list = pymc3_rv_map.keys() if dist.__class__.__name__ not in dist_list: return None with pymc3.Model(): pymc3_rv_map[dist.__class__.__name__](dist) return pymc3.sample(size, chains=1, progressbar=False, random_seed=seed)[:]['X']
def model_setup(visible, normalized_weights, estimateexplanterms={}, estimatesdexplanterms={}, inferenceparams={}): nsensors = visible['y'].shape[0] n = visible['y'].shape[1] model = pm.Model() with model: doft = inferenceparams['doft'] if 'doft' in inferenceparams else 4 priorfactor = inferenceparams[ 'priorfactor'] if 'priorfactor' in inferenceparams else 1.0 # m and l for reference product 0 m0 = tt.zeros((1)) + 0.0 l0 = tt.zeros((1)) + 1.0 # m and l priors for the remaining products mest = pm.StudentT('mest', doft, mu=0, sd=0.3 * priorfactor, shape=(nsensors - 1)) lest = pm.StudentT('lest', doft, mu=1, sd=0.3 * priorfactor, shape=(nsensors - 1)) # define mu and M for remaining products depending on whether mu is estimated or set to zero if 'mu' in estimatesdexplanterms and estimatesdexplanterms[ 'mu'] and normalized_weights['mu']['weight'].shape[0] > 0: sdmu = pm.Exponential('sdmu', 1 / (0.3 * priorfactor)) else: sdmu = pm.Deterministic('sdmu', tt.ones(1) * 0.3 * priorfactor) if 'mu' in estimateexplanterms and estimateexplanterms[ 'mu'] and normalized_weights['mu']['weight'].shape[0] > 0: muestnondim = pm.StudentT('muestnondim', doft, mu=0, sd=1, shape=(normalized_weights['mu']['nfac'], nsensors - 1)) muest = pm.Deterministic('muest', muestnondim * sdmu) Mest = mest[:, np.newaxis] + tt.sum( muest[:, :, np.newaxis] * normalized_weights['mu']['weight'][:, np.newaxis, :], axis=0 ) #inside the sum: first dimension: explan. factor, second dimension: product, third dimension: time else: muest = pm.Deterministic( 'muest', tt.zeros((normalized_weights['mu']['nfac'], nsensors - 1))) Mest = mest[:, np.newaxis] # same for mu0 and M0 if 'mu0' in estimateexplanterms and estimateexplanterms[ 'mu0'] and normalized_weights['mu']['weight'].shape[0] > 0: mu0estnondim = pm.StudentT( 'mu0estnondim', doft, mu=0, sd=1, shape=(normalized_weights['mu']['nfac'])) mu0est = pm.Deterministic('mu0est', mu0estnondim * sdmu) M0est = m0 + tt.sum( mu0est[:, np.newaxis] * normalized_weights['mu']['weight'], axis=0) else: mu0est = pm.Deterministic( 'mu0est', tt.zeros(normalized_weights['mu']['nfac'])) M0est = m0 # define lambda and L for remaining products depending on whether lambda is estimated or set to zero if 'lambda' in estimatesdexplanterms and estimatesdexplanterms[ 'lambda'] and normalized_weights['lambda']['weight'].shape[ 0] > 0: sdlambda = pm.Exponential('sdlambda', 1 / (0.3 * priorfactor)) else: sdlambda = pm.Deterministic('sdlambda', tt.ones(1) * 0.3 * priorfactor) if 'lambda' in estimateexplanterms and estimateexplanterms[ 'lambda'] and normalized_weights['lambda']['weight'].shape[ 0] > 0: lambdaestnondim = pm.StudentT( 'lambdaestnondim', doft, mu=0, sd=1, shape=(normalized_weights['lambda']['nfac'], nsensors - 1)) lambdaest = pm.Deterministic('lambdaest', lambdaestnondim * sdlambda) Lest = lest[:, np.newaxis] + tt.sum( lambdaest[:, :, np.newaxis] * normalized_weights['lambda']['weight'][:, np.newaxis, :], axis=0) else: lambdaest = pm.Deterministic( 'lambdaest', tt.zeros((normalized_weights['lambda']['nfac'], nsensors - 1))) Lest = lest[:, np.newaxis] # same for lambda0 and L0 if 'lambda0' in estimateexplanterms and estimateexplanterms[ 'lambda0'] and normalized_weights['lambda']['weight'].shape[ 0] > 0: lambda0estnondim = pm.StudentT( 'lambda0estnondim', doft, mu=0, sd=1, shape=(normalized_weights['lambda']['nfac'])) lambda0est = pm.Deterministic('lambda0est', lambda0estnondim * sdlambda) L0est = l0 + tt.sum(lambda0est[:, np.newaxis] * normalized_weights['lambda']['weight'], axis=0) else: lambda0est = pm.Deterministic( 'lambda0est', tt.zeros(normalized_weights['lambda']['nfac'])) L0est = l0 # product variance if 'sigmap0' in estimateexplanterms and not estimateexplanterms[ 'sigmap0']: # set sigmap0 to fixed value sigmap0value = inferenceparams[ 'sigmap0'] if 'sigmap0' in inferenceparams else 0.01 sigmap0squared = pm.Deterministic('sigmap0squared', tt.ones(1) * sigmap0value**2) sigmapsquaredest = pm.Exponential('sigmapsquaredest', 1 / (0.1 * priorfactor), shape=(nsensors - 1)) sigmapsquared = tt.concatenate([sigmap0squared, sigmapsquaredest], axis=0) else: if 'sigmap0prior' in inferenceparams and inferenceparams[ 'sigmap0prior'] is not None: sigmap0squared = pm.Exponential( 'sigmap0squared', 1 / (inferenceparams['sigmap0prior'] * priorfactor), shape=(1)) sigmapsquaredest = pm.Exponential('sigmapsquaredest', 1 / (0.1 * priorfactor), shape=(nsensors - 1)) sigmapsquared = tt.concatenate( [sigmap0squared, sigmapsquaredest], axis=0) else: # prior for product noise variance (all explanatory factors set to 1) sigmapsquared = pm.Exponential('sigmapsquared', 1 / (0.1 * priorfactor), shape=(nsensors)) # associated standard deviation for ease of reference sigmap = pm.Deterministic('sigmap', tt.sqrt(sigmapsquared)) # define kappa and predicted product noise variance depending on how/whether kappa is estimated or not if 'kappa' in estimatesdexplanterms and estimatesdexplanterms[ 'kappa'] and normalized_weights['kappa']['weight'].shape[0] > 0: sdkappa = pm.Exponential('sdkappa', 1 / (1.0 * priorfactor)) else: sdkappa = pm.Deterministic('sdkappa', tt.ones(1) * 1.0 * priorfactor) if 'kappa' in estimateexplanterms and estimateexplanterms[ 'kappa'] and normalized_weights['kappa']['weight'].shape[0] > 0: if 'kappa0' in estimateexplanterms and estimateexplanterms[ 'kappa0']: kappaestnondim = pm.StudentT( 'kappaestnondim', doft, mu=0, sd=1, shape=(normalized_weights['kappa']['nfac'], nsensors) ) #note that for kappa all sensors (including reference sensor) are represented in the same variable kappaest = pm.Deterministic('kappaest', kappaestnondim * sdkappa) kappa = pm.Deterministic('kappa', 1.0 * kappaest) else: kappaestnondim = pm.StudentT( 'kappaestnondim', doft, mu=0, sd=1, shape=(normalized_weights['kappa']['nfac'], nsensors - 1)) kappaest = pm.Deterministic('kappaest', kappaestnondim * sdkappa) kappa0 = tt.zeros((normalized_weights['kappa']['nfac'], 1)) kappa = pm.Deterministic( 'kappa', tt.concatenate([kappa0, kappaest], axis=1)) sigmasquaredtotal = (sigmapsquared[:, np.newaxis] * tt.prod( tt.pow(normalized_weights['kappa']['weight'][:, np.newaxis, :], kappa[:, :, np.newaxis]), axis=0)) else: kappa = pm.Deterministic('kappa', tt.zeros((1, nsensors))) sigmasquaredtotal = sigmapsquared[:, np.newaxis] * tt.ones( (nsensors, n)) # porosity, i.e. maximum soil moisture content: T prior porosity = pm.StudentT('porosity', doft, mu=0.4, sd=0.1 * priorfactor) thetamodel = inferenceparams[ 'thetamodel'] if 'thetamodel' in inferenceparams else 'beta' dofchi = inferenceparams['dofchi'] if 'dofchi' in inferenceparams else 3 softabsvalue = inferenceparams[ 'softabsvalue'] if 'softabsvalue' in inferenceparams else 0.01 # distribution of theta if thetamodel == 'beta': # beta distribution (A=alpha and B=beta estimated), can vary with explan. factors a = pm.ChiSquared('a', dofchi) b = pm.ChiSquared('b', dofchi) if 'sdalphabeta' in estimatesdexplanterms and estimatesdexplanterms[ 'alphabeta']: sdalphabeta = pm.Exponential('sdalphabeta', 1 / (0.3 * priorfactor)) else: sdalphabeta = pm.Deterministic('sdalphabeta', tt.ones(1) * 0.3 * priorfactor) if 'alphabeta' in estimateexplanterms and estimateexplanterms[ 'alphabeta']: alphanondim = pm.StudentT( 'alphanondim', doft, mu=0, sd=1, shape=(normalized_weights['alphabeta']['nfac'])) betanondim = pm.StudentT( 'betanondim', doft, mu=0, sd=1, shape=(normalized_weights['alphabeta']['nfac'])) alpha = pm.Deterministic('alpha', alphanondim * sdalphabeta) beta = pm.Deterministic('beta', betanondim * sdalphabeta) A = tt.sqrt( softabsvalue**2 + tt.pow( a + tt.sum(alpha[:, np.newaxis] * normalized_weights['alphabeta']['weight'], axis=0), 2) ) # soft absolute value; A and B should be >> softabsvalue B = tt.sqrt(softabsvalue**2 + tt.pow( b + tt.sum(beta[:, np.newaxis] * normalized_weights['alphabeta']['weight'], axis=0), 2)) else: alpha = pm.Deterministic('alpha', tt.zeros(1) * 0.0) beta = pm.Deterministic('beta', tt.zeros(1) * 0.0) A = a B = b thetaub = pm.Beta('thetaub', alpha=A, beta=B, shape=(n)) theta = pm.Deterministic('theta', porosity * thetaub) elif thetamodel == 'logistic': # spline with logistic link a = pm.StudentT('a', doft, mu=0.0, sd=3.0 * priorfactor) b = pm.Exponential('b', 1. / (3 * priorfactor)) if 'sdalphabeta' in estimatesdexplanterms and estimatesdexplanterms[ 'alphabeta']: sdalphabeta = pm.Exponential('sdalphabeta', 1.0 / (1.0 * priorfactor)) else: sdalphabeta = pm.Deterministic('sdalphabeta', tt.ones(1) * 1.0 * priorfactor) if 'alphabeta' in estimateexplanterms and estimateexplanterms[ 'alphabeta']: alphanondim = pm.StudentT( 'alphanondim', doft, mu=0, sd=1, shape=(normalized_weights['alphabeta']['nfac'])) betanondim = pm.StudentT( 'betanondim', doft, mu=0, sd=1, shape=(normalized_weights['alphabeta']['nfac'])) alpha = pm.Deterministic('alpha', alphanondim * sdalphabeta) beta = pm.Deterministic('beta', betanondim * sdalphabeta) A = a + tt.sum(alpha[:, np.newaxis] * normalized_weights['alphabeta']['weight'], axis=0) B = tt.sqrt(softabsvalue**2 + tt.pow( b + tt.sum(beta[:, np.newaxis] * normalized_weights['alphabeta']['weight'], axis=0), 2)) else: alpha = pm.Deterministic('alpha', tt.zeros(1) * 0.0) beta = pm.Deterministic('beta', tt.zeros(1) * 0.0) A = a B = b thetaubnondim = pm.Normal('thetaubnondim', mu=0, sd=1, shape=(n)) thetaub = pm.Deterministic('thetaub', A + B * thetaubnondim) theta = pm.Deterministic( 'theta', porosity * tt.pow(1 + tt.exp(-thetaub), -1)) # assemble mean of observed products thetaoffset = inferenceparams[ 'thetaoffset'] if 'thetaoffset' in inferenceparams else 0.15 if thetaoffset == 'observedmean': thetaoffset = np.mean(visible['y'][0, :]) y0 = M0est + L0est * (theta[np.newaxis, :] - thetaoffset) yrest = Mest + Lest * (theta[np.newaxis, :] - thetaoffset) yest = tt.concatenate([y0, yrest], axis=0) + thetaoffset # model for observed products studenterrors = inferenceparams[ 'studenterrors'] if 'studenterrors' in inferenceparams else False if not studenterrors: y = pm.Normal('y', mu=yest, sd=tt.sqrt(sigmasquaredtotal), observed=visible['y']) else: studenterrors_dof = inferenceparams[ 'studenterrors_dof'] if 'studenterrors_dof' in inferenceparams else doft lam = tt.pow(sigmasquaredtotal, -1) * (studenterrors_dof / (studenterrors_dof - 2)) y = pm.StudentT('y', studenterrors_dof, mu=yest, lam=lam, observed=visible['y']) return model
def build_biallelic_model4(g, n, s): # EXPERIMENTAL: Underlying allele freqs overdispersed before errors. a = 2 with pm.Model() as model: # Fraction pi = pm.Dirichlet( 'pi', a=np.ones(s), shape=(n, s), transform=stick_breaking, ) pi_hyper = pm.Data('pi_hyper', value=0.0) pm.Potential('heterogeneity_penalty', -(pm.math.sqrt(pi).sum(0).sum()**2) * pi_hyper) rho_hyper = pm.Data('rho_hyper', value=0.0) pm.Potential('diversity_penalty', -(pm.math.sqrt(pi.sum(0)).sum()**2) * rho_hyper) # Genotype gamma_ = pm.Uniform('gamma_', 0, 1, shape=(g * s, 1)) gamma = pm.Deterministic( 'gamma', (pm.math.concatenate([gamma_, 1 - gamma_], axis=1).reshape( (g, s, a)))) gamma_hyper = pm.Data('gamma_hyper', value=0.0) pm.Potential( 'ambiguity_penalty', -(pm.math.sqrt(gamma).sum(2)**2).sum(0).sum(0) * gamma_hyper) # Product of fraction and genotype true_p = pm.Deterministic('true_p', pm.math.dot(pi, gamma)) # Overdispersion term # TODO: Consider making it different between samples. How to shape? alpha = pm.ChiSquared('alpha', nu=50) _true_p = true_p.reshape((-1, a))[:, 0] _true_q = 1 - _true_p overdispersed_p_ = pm.Beta('overdispersed_p_', alpha=_true_p * alpha, beta=_true_q * alpha, shape=(n * g, )) overdispersed_p = pm.Deterministic( 'overdispersed_p', pm.math.concatenate([ overdispersed_p_.reshape( (-1, 1)), 1 - overdispersed_p_.reshape((-1, 1)) ], axis=1).reshape((n, g, a))) # Sequencing error # epsilon_hyper = pm.Gamma('epsilon_hyper', alpha=100, beta=1) epsilon_hyper = pm.Data('epsilon_hyper', value=100) epsilon = pm.Beta('epsilon', alpha=2, beta=epsilon_hyper, shape=n) epsilon_ = epsilon.reshape((n, 1, 1)) p_with_error = (overdispersed_p * (1 - epsilon_) + (1 - overdispersed_p) * (epsilon_ / (a - 1))) # Observation # _p = p_with_error.reshape((-1, a))[:,0] # _q = 1 - _p observed = pm.Data('observed', value=np.empty((g * n, a))) pm.Binomial('data', p=p_with_error.reshape((-1, a))[:, 0], n=observed.reshape((-1, a)).sum(1), observed=observed[:, 0]) return model
""") plt.figure(dpi=100) ##### COMPUTATION ##### # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE k_real = 2 # DRAW A SAMPLE OF N=1000 np.random.seed(42) sample = stats.chi2.rvs(df=k_real, size=1000) ##### SIMULATION ##### # MODEL BUILDING with pm.Model() as model: k = pm.DiscreteUniform("k", lower=0, upper=np.mean(sample)*7) # mean + 3stds chi_2 = pm.ChiSquared("chi2", nu=k, observed=sample) # MODEL RUN with model: trace = pm.sample(50000) burned_trace = trace[45000:] # MU - 95% CONF INTERVAL ks = burned_trace["k"] k_est_95 = np.mean(ks) - 2*np.std(ks), np.mean(ks) + 2*np.std(ks) print("95% of sampled mus are between {} and {}".format(*k_est_95)) ##### PLOTTING ##### # SAMPLE DISTRIBUTION plt.hist(sample, bins=50,normed=True, alpha=.25)