def fe(data): """ Fixed Effect model:: Y_r,c,t = beta * X_r,c,t + e_r,c,t e_r,c,t ~ N(0, sigma^2) """ # covariates K1 = count_covariates(data, 'x') X = pl.array([data['x%d' % i] for i in range(K1)]) K2 = count_covariates(data, 'w') W = pl.array([data['w%d' % i] for i in range(K1)]) # priors beta = mc.Uninformative('beta', value=pl.zeros(K1)) gamma = mc.Uninformative('gamma', value=pl.zeros(K2)) sigma_e = mc.Uniform('sigma_e', lower=0, upper=1000, value=1) # predictions @mc.deterministic def mu(X=X, beta=beta): return pl.dot(beta, X) param_predicted = mu @mc.deterministic def sigma_explained(W=W, gamma=gamma): """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a""" return pl.dot(gamma, W) @mc.deterministic def predicted(mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e): return mc.rnormal(mu, 1 / (sigma_explained**2. + sigma_e**2.)) # likelihood i_obs = pl.find(1 - pl.isnan(data.y)) @mc.observed def obs(value=data.y, i_obs=i_obs, mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e): return mc.normal_like(value[i_obs], mu[i_obs], 1. / (sigma_explained[i_obs]**2. + sigma_e**-2.)) # set up MCMC step methods mod_mc = mc.MCMC(vars()) mod_mc.use_step_method(mc.AdaptiveMetropolis, mod_mc.beta) # find good initial conditions with MAP approx print 'attempting to maximize likelihood' var_list = [mod_mc.beta, mod_mc.obs, mod_mc.sigma_e] mc.MAP(var_list).fit(method='fmin_powell', verbose=1) return mod_mc
def test_covariate_model_dispersion(): # simulate normal data n = 100 model = dismod_mr.data.ModelData() model.hierarchy, model.output_template = dismod_mr.testing.data_simulation.small_output() Z = mc.rcategorical([.5, 5.], n) zeta_true = -.2 pi_true = .1 ess = 10000.*np.ones(n) eta_true = np.log(50) delta_true = 50 + np.exp(eta_true) p = mc.rnegative_binomial(pi_true*ess, delta_true*np.exp(Z*zeta_true)) / ess model.input_data = pd.DataFrame(dict(value=p, z_0=Z)) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors variables = dict(mu=mc.Uninformative('mu_test', value=pi_true)) variables.update(dismod_mr.model.covariates.mean_covariate_model('test', variables['mu'], model.input_data, {}, model, 'all', 'total', 'all')) variables.update(dismod_mr.model.covariates.dispersion_covariate_model('test', model.input_data, .1, 10.)) variables.update(dismod_mr.model.likelihood.neg_binom('test', variables['pi'], variables['delta'], p, ess)) # fit model m = mc.MCMC(variables) m.sample(2)
def anneal_ldst(n=11, phases=10, iters=1000): """ MCMC/simulated annealing to generate a random low-degree spanning tree on a grid graph Parameters ---------- n : int, size of grid phases : int, optional, number of cooling phases iters : int, optional, number of MCMC steps per phase Returns ------- T : nx.Graph, spanning tree with T.base_graph, with few degree 3 vertices """ beta = pm.Uninformative('beta', value=1.) ldst = LDST(my_grid_graph([n,n]), beta=beta) mod_mc = pm.MCMC([beta, ldst]) mod_mc.use_step_method(STMetropolis, ldst) mod_mc.use_step_method(pm.NoStepper, beta) for i in range(phases): print('phase %d' % (i+1),) beta.value = i*5 mod_mc.sample(iters, burn=iters-1) print('frac of deg 2 vtx = %.2f' % np.mean(np.array(ldst.value.degree().values()) == 2)) return ldst.value
def anneal_bdst(n=11, depth=10, phases=10, iters=1000): """ MCMC/simulated annealing to generate a random bounded-depth spanning tree Parameters ---------- n : int, size of grid depth : int, optional, target bound on depth Returns ------- T : nx.Graph, spanning tree with T.base_graph, possibly with degree bound satisfied """ beta = pm.Uninformative('beta', value=1.) G = nx.grid_graph([n, n]) root = ((n-1)/2, (n-1)/2) bdst = BDST(G, root, depth, beta) @pm.deterministic def max_depth(T=bdst, root=root): shortest_path_length = nx.shortest_path_length(T, root) T.max_depth = max(shortest_path_length.values()) return T.max_depth mod_mc = pm.MCMC([beta, bdst, max_depth]) mod_mc.use_step_method(STMetropolis, bdst) mod_mc.use_step_method(pm.NoStepper, beta) for i in range(phases): beta.value = i*5 mod_mc.sample(iters, thin=max(1, iters/100)) print('cur depth', max_depth.value) print('pct of trace with max_depth <= depth', np.mean(mod_mc.trace(max_depth)[:] <= depth)) return bdst.value
def __init__(self, loc, scale, loc_step_method=None, scale_step_method=None, beta_step_method=None, loc_step_method_args=None, scale_step_method_args=None, beta_step_method_args=None, *args, **kwargs): if type(loc) != list: loc = [loc] self.loc = loc self.scale = scale self.beta = set([]) for node in self.loc: self.beta.update(node.extended_children) pm.StepMethod.__init__(self, [scale] + loc + list(self.beta), *args, **kwargs) #set alpha self.alpha = pm.Uninformative('alpha', value=1., trace=False, plot=False) #assign default Metropolis step method if needed if loc_step_method is None: loc_step_method = pm.Metropolis if scale_step_method is None: scale_step_method = pm.Metropolis if beta_step_method is None: beta_step_method = pm.Metropolis if loc_step_method_args is None: loc_step_method_args = {} if scale_step_method_args is None: scale_step_method_args = {} if beta_step_method_args is None: beta_step_method_args = {} #set step methods self.loc_steps = [ loc_step_method(node, **loc_step_method_args) for node in self.loc ] self.scale_step = scale_step_method(scale, **scale_step_method_args) self.beta_steps = [ beta_step_method(node, **beta_step_method_args) for node in self.beta ] self.alpha_step = MetropolisAlpha(self.alpha, self.beta, loc, scale)
def __init__(self, predictions, measurements, uncertainties, regularization_strength=1.0, prior_pops=None): """Bayesian Energy Landscape Tilting with maximum entropy prior. Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment regularization_strength : float How strongly to weight the MVN prior (e.g. lambda) precision : ndarray, optional, shape = (num_measurements, num_measurements) The precision matrix of the predicted observables. prior_pops : ndarray, optional, shape = (num_frames) Prior populations of each conformation. If None, use uniform populations. """ BELT.__init__(self, predictions, measurements, uncertainties, prior_pops=prior_pops) self.alpha = pymc.Uninformative( "alpha", value=np.zeros(self.num_measurements) ) # The prior on alpha is defined as a potential, so we use Uninformative variables here. self.initialize_variables() self.log_prior_pops = np.log(self.prior_pops) @pymc.potential def logp_prior(populations=self.populations, log_prior_pops=self.log_prior_pops): # So x log(x) -> 0 as x -> 0, so we want to *drop* zeros # This is important because we otherwise might get NANs, as numpy doesn't know how to evaluate x * np.log(x) ind = np.where(populations > 0)[0] populations = populations[ind] log_prior_pops = log_prior_pops[ind] expr = populations.dot( np.log(populations)) - populations.dot(log_prior_pops) return -1 * regularization_strength * expr self.logp_prior = logp_prior
def linear(): beta = mc.Uninformative('beta', value=[0., 0.]) sigma = mc.Uniform('sigma', lower=0., upper=100., value=1.) @mc.deterministic def y_mean(beta=beta, X=data.hdi2005): return beta[0] + beta[1] * X y_obs = mc.Normal('y_obs', value=data.tfr2005, mu=y_mean, tau=sigma**-2, observed=True) return vars()
def nonlinear(): beta = mc.Uninformative('beta', value=[0., 0., 0.]) gamma = mc.Normal('gamma', mu=.9, tau=.05**-2, value=.9) sigma = mc.Uniform('sigma', lower=0., upper=100., value=1.) @mc.deterministic def y_mean(beta=beta, gamma=gamma, X=data.hdi2005): return beta[0] + beta[1]*X \ + beta[2]*pl.maximum(0., X-gamma) y_obs = mc.Normal('y_obs', value=data.tfr2005, mu=y_mean, tau=sigma**-2, observed=True) return vars()
def __init__(self, predictions, measurements, uncertainties, regularization_strength=1.0, prior_pops=None): """Bayesian Energy Landscape Tilting with Dirichlet prior. Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment regularization_strength : float How strongly to weight the prior (e.g. lambda) precision : ndarray, optional, shape = (num_measurements, num_measurements) The precision matrix of the predicted observables. prior_pops : ndarray, optional, shape = (num_frames) Prior populations of each conformation. If None, use uniform populations. """ BELT.__init__(self, predictions, measurements, uncertainties, prior_pops=prior_pops) self.alpha = pymc.Uninformative( "alpha", value=np.zeros(self.num_measurements) ) # The prior on alpha is defined as a potential, so we use Uninformative variables here. self.initialize_variables() @pymc.potential def logp_prior(populations=self.populations): if populations.min() <= 0: return -1 * np.inf else: expr = self.prior_pops.dot(np.log(populations)) return regularization_strength * expr self.logp_prior = logp_prior
def banana(dim=2, b=.03, step='Metropolis', iters=5000): """ The non-linear banana-shaped distributions are constructed from the Gaussian ones by 'twisting' them as follows. Let f be the density of the multivariate normal distribution N(0, C_1) with the covariance again given by C_1 = diag(100, 1, ..., 1). The density function of the 'twisted' Gaussian with the nonlinearity parameter b > 0 is given by f_b = f \circ \phi_b, where the function \phi)b = (x_1, x_2 + b x_1^2 - 100b, x_3, ..., x_n). """ assert dim >= 2, 'banana must be dimension >= 2' C_1 = pl.ones(dim) C_1[0] = 100. X = mc.Uninformative('X', value=pl.zeros(dim)) def banana_like(X, tau, b): phi_X = pl.copy(X) phi_X *= 30. # rescale X to match scale of other models phi_X[1] = phi_X[1] + b * phi_X[0]**2 - 100 * b return mc.normal_like(phi_X, 0., tau) @mc.potential def banana(X=X, tau=C_1**-1, b=b): return banana_like(X, tau, b) mod = setup_and_sample(vars(), step, iters) im = pl.imread('banana.png') x = pl.arange(-1, 1, .01) y = pl.arange(-1, 1, .01) z = [[banana_like(pl.array([xi, yi]), C_1[[0, 1]]**-1, b) for xi in x] for yi in y] def plot_distribution(): pl.imshow(im, extent=[-1, 1, -1, 1], aspect='auto', interpolation='bicubic') pl.contour(x, y, z, [-1000, -10, -6], cmap=pl.cm.Greys, alpha=.5) mod.plot_distribution = plot_distribution return mod
def __init__(self, predictions, measurements, uncertainties, prior_pops=None, weights_alpha=None): """Bayesian Energy Landscape Tilting with Jeffrey's prior. Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment prior_pops : ndarray, optional, shape = (num_frames) Prior populations of each conformation. If None, use uniform populations. Notes: ------ This feature is UNTESTED. """ BELT.__init__(self, predictions, measurements, uncertainties, prior_pops=prior_pops) self.alpha = pymc.Uninformative("alpha", value=np.zeros(self.num_measurements)) self.initialize_variables() @pymc.potential def logp_prior(populations=self.populations, mu=self.mu): return log_jeffreys(populations, predictions, mu=mu) self.logp_prior = logp_prior
y_hat = n*p return 2.*sum((y*log(y/y_hat))[where(y>0)]) + 2.*sum(((n-y) * log(n-y) / (n-y_hat))[where(y<n)]) # c: .02-1. # alph: 18-30 # b: .02-1. # alpha = array([10., 10., 10., 3.]) sig_mean = array([.5, .4, .5, 1, .4, .5, 1]) # sigma = pm.Gamma('sigma', alpha, alpha/sig_mean) sigma = pm.OneOverX('sigma', value=sig_mean) p_mean_mu = array([1.3, 3.7, 2.3, 0, 3.7, 2.3, 0]) # p_mean = pm.MvNormal('p_mean', p_mean_mu, diag([10., 10., 10., 1.])) p_mean = pm.Uninformative('p_mean',value=p_mean_mu) R1 = pm.Uninformative('R1', zeros(6,dtype=float)) R2 = pm.Uninformative('R2', zeros(3,dtype=float)) R3 = pm.Uninformative('R3', zeros(3,dtype=float)) # For debugging # R1.value = arange(1,7) # R2.value = arange(7,10) # R3.value = arange(10,13) @pm.deterministic def cholfac(R1=R1, R2=R2, R3=R3, sigma = sigma): """Cholesky factor of the covariance matrix.""" cov = np.zeros((7,7),dtype=float)
def create_model(region_name, all_pts, name, scale_params, amp_params, cpus, with_stukel, spatial, chunk, covariate_names, disttol, ttol, AM_delay=50000, AM_interval=100, AM_sd=.1, crashed_db=None): # ====================================== # = Make sure it's safe to make output = # ====================================== if not spatial: name += '_nonspatial' if with_stukel: name += '_stukel' for cname in covariate_names: name += '_%s' % cname if name + '.hdf5' in os.listdir('.'): print print """============= = ATTENTION = =============""" print OK = False while not OK: y = raw_input( 'Database %s already exists.\nDo you want to delete it? Error will be raised otherwise.\n>> ' % (name + '.hdf5')) if y.lower() == 'yes': print 'OK, moving to trash.' os.system('mv %s ~/.Trash' % (name + '.hdf5')) OK = True elif y.lower() == 'no': raise RuntimeError, 'But dash it all! I mean to say, what?' else: y = raw_input('Please type yes or no.\n>> ') norun_name = '_'.join(name.split('_')[:2]) C_time = [0.] f_time = [0.] M_time = [0.] # ============================= # = Preprocess data, uniquify = # ============================= # Convert latitude and longitude from degrees to radians. lon = all_pts.LONG * np.pi / 180. lat = all_pts.LAT * np.pi / 180. # Convert time to end year - 2009 (no sense forcing mu to adjust by too much). # t = all_pts.YEAR_START-2009. + all_pts.MONTH_STAR / 12. t = all_pts.TIME - 2009 # Make lon, lat, t triples. data_mesh = np.vstack((lon, lat, t)).T disttol = disttol / 6378. ttol = ttol / 12. # Find near spatiotemporal duplicates. if spatial: ui = [] ri = [] fi = [] ti = [] dx = np.empty(1) for i in xrange(data_mesh.shape[0]): match = False for j in xrange(len(ui)): pm.gp.geo_rad(dx, data_mesh[i, :2].reshape((1, 2)), data_mesh[ui[j], :2].reshape((1, 2))) dt = abs(t[ui[j]] - t[i]) if dx[0] < disttol and dt < ttol: match = True fi.append(j) ti[j].append(i) ri.append(i) break if not match: fi.append(len(ui)) ui.append(i) ti.append([i]) ui = np.array(ui) ti = [np.array(tii) for tii in ti] fi = np.array(fi) ri = np.array(ri) logp_mesh = data_mesh[ui, :] if len(ri) > 0: repeat_mesh = data_mesh[ri, :] else: repeat_mesh = np.array([]) else: ui = np.arange(len(t)) ti = [np.array([uii]) for uii in ui] fi = ui ri = np.array([]) logp_mesh = data_mesh repeat_mesh = np.array([]) # ===================== # = Create PyMC model = # ===================== init_OK = False while not init_OK: # Flat prior on m_const (mu). m_const = pm.Uninformative('m_const', value=-3.) if with_stukel: m_const.value = -1.1 # Flat prior on coefficient of time (k). t_coef = pm.Uninformative('t_coef', value=.1) if with_stukel: t_coef.value = -.4 # Inverse-gamma prior on nugget variance V. tau = pm.Gamma('tau', value=2., alpha=.001, beta=.001 / .25) V = pm.Lambda('V', lambda tau=tau: 1. / tau) vars_to_writeout = ['V', 'm_const', 't_coef'] # Pull out covariate information. # The values of covariate_dict are (Stochastic, interpolated covariate) tuples. # Interpolation is done to the data mesh. covariate_dict = {} for cname in covariate_names: # hf = openFile(mbgw.__path__[0] + '/auxiliary_data/' + cname + '.hdf5') if cname == 'periurb': this_interp_covariate = all_pts.URB_CLS == 2 if np.sum(all_pts.URB_CLS == 3) < 10: print 'Warning: Very few urban points, using same coefficient for urban and periurban' this_interp_covariate += all_pts.URB_CLS == 3 elif cname == 'urb': if np.sum(all_pts.URB_CLS == 3) >= 10: this_interp_covariate = all_pts.URB_CLS == 3 else: this_interp_covariate = None else: this_cov = getattr(auxiliary_data, cname) this_interp_covariate = nearest_interp(this_cov.long[:], this_cov.lat[:], this_cov.data, data_mesh[:, 0], data_mesh[:, 1]) if this_interp_covariate is not None: this_coef = pm.Uninformative(cname + '_coef', value=0.) covariate_dict[cname] = (this_coef, this_interp_covariate) # Lock down parameters of Stukel's link function to obtain standard logit. # These can be freed by removing 'observed' flags, but mixing gets much worse. if with_stukel: a1 = pm.Uninformative('a1', .5) a2 = pm.Uninformative('a2', .8) else: a1 = pm.Uninformative('a1', 0, observed=True) a2 = pm.Uninformative('a2', 0, observed=True) transformed_spatial_vars = [V] if spatial: # Make it easier for inc (psi) to jump across 0: let nonmod_inc roam freely over the reals, # and mod it by pi to get the 'inc' parameter. nonmod_inc = pm.Uninformative('nonmod_inc', value=.5) inc = pm.Lambda('inc', lambda nonmod_inc=nonmod_inc: nonmod_inc % np.pi) # Use a uniform prior on sqrt ecc (sqrt ???). Using a uniform prior on ecc itself put too little # probability mass on appreciable levels of anisotropy. sqrt_ecc = pm.Uniform('sqrt_ecc', value=.1, lower=0., upper=1.) ecc = pm.Lambda('ecc', lambda s=sqrt_ecc: s**2) # Subjective skew-normal prior on amp (the partial sill, tau) in log-space. # Parameters are passed in in manual_MCMC_supervisor. log_amp = pm.SkewNormal('log_amp', **amp_params) amp = pm.Lambda('amp', lambda log_amp=log_amp: np.exp(log_amp)) # Subjective skew-normal prior on scale (the range, phi_x) in log-space. log_scale = pm.SkewNormal('log_scale', **scale_params) scale = pm.Lambda('scale', lambda log_scale=log_scale: np.exp(log_scale)) # Exponential prior on the temporal scale/range, phi_t. Standard one-over-x # doesn't work bc data aren't strong enough to prevent collapse to zero. scale_t = pm.Exponential('scale_t', .1) # Uniform prior on limiting correlation far in the future or past. t_lim_corr = pm.Uniform('t_lim_corr', 0, 1, value=.8) # # Uniform prior on sinusoidal fraction in temporal variogram sin_frac = pm.Uniform('sin_frac', 0, 1) vars_to_writeout.extend([ 'inc', 'ecc', 'amp', 'scale', 'scale_t', 't_lim_corr', 'sin_frac' ]) transformed_spatial_vars.extend([inc, ecc, amp, scale]) # Collect stochastic variables with observed=False for the adaptive Metropolis stepper. trial_stochs = [v[0] for v in covariate_dict.itervalues() ] + [m_const, tau, a1, a2, t_coef] if spatial: trial_stochs = trial_stochs + [ nonmod_inc, sqrt_ecc, log_amp, log_scale, scale_t, t_lim_corr, sin_frac ] nondata_stochs = [] for stoch in trial_stochs: if not stoch.observed: nondata_stochs.append(stoch) # Collect variables to write out # The mean of the field @pm.deterministic def M(m=m_const, tc=t_coef): return pm.gp.Mean(st_mean_comp, m_const=m, t_coef=tc) # The mean, evaluated at the observation points, plus the covariates @pm.deterministic(trace=False) def M_eval(M=M, lpm=logp_mesh, cv=covariate_dict): out = M(lpm) for c in cv.itervalues(): out += c[0] * c[1][ui] return out # Create covariance and MV-normal F if model is spatial. if spatial: try: # A constraint on the space-time covariance parameters that ensures temporal correlations are # always between -1 and 1. @pm.potential def st_constraint(sd=.5, sf=sin_frac, tlc=t_lim_corr): if -sd >= 1. / (-sf * (1 - tlc) + tlc): return -np.Inf else: return 0. # A Deterministic valued as a Covariance object. Uses covariance my_st, defined above. @pm.deterministic def C(amp=amp, scale=scale, inc=inc, ecc=ecc, scale_t=scale_t, t_lim_corr=t_lim_corr, sin_frac=sin_frac): return pm.gp.FullRankCovariance(my_st, amp=amp, scale=scale, inc=inc, ecc=ecc, st=scale_t, sd=.5, tlc=t_lim_corr, sf=sin_frac, n_threads=cpus) # The evaluation of the Covariance object. @pm.deterministic(trace=False) def C_eval(C=C): return C(logp_mesh, logp_mesh) # The field evaluated at the uniquified data locations f = pm.MvNormalCov('f', M_eval, C_eval, value=M_eval.value) # The field evaluated at all the data locations @pm.deterministic(trace=False) def f_eval(f=f): return f[fi] init_OK = True except pm.ZeroProbability, msg: print 'Trying again: %s' % msg init_OK = False gc.collect() # if not spatial else: C = None # The field is just the mean, there's no spatially-structured component. @pm.deterministic def f(M=M_eval): return M[fi] f_eval = f init_OK = True
@mc.deterministic def pred(pi=pi): return mc.rbinomial(n_pred, pi) / float(n_pred) ### @export 'binomial-fit' mc.MCMC([pi, obs, pred]).sample(iter, burn, thin, verbose=False, progress_bar=False) ### @export 'binomial-store' # mc.Matplot.plot(pi) # pl.savefig('book/graphics/ci-prev_meta_analysis-binomial_diagnostic.png') results['Binomial'] = dict(pi=pi.stats(), pred=pred.stats()) ### @export 'beta-binomial-model' alpha = mc.Uninformative('alpha', value=4.) beta = mc.Uninformative('beta', value=1000.) pi_mean = mc.Lambda('pi_mean', lambda alpha=alpha, beta=beta: alpha/(alpha+beta)) pi = mc.Beta('pi', alpha, beta, value=r) @mc.potential def obs(pi=pi): return mc.binomial_like(r*n, n, pi) @mc.deterministic def pred(alpha=alpha, beta=beta): return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred) ### @export 'beta-binomial-fit' mcmc = mc.MCMC([alpha, beta, pi_mean, pi, obs, pred]) mcmc.use_step_method(mc.AdaptiveMetropolis, [alpha, beta])
def KellyModel(x, xerr, y, yerr, xycovar, parts, ngauss = 3): #Implementation of Kelly07 model, but without nondetection support #Prior as defined in section 6.1 of Kelly07 alpha = pymc.Uninformative('alpha', value = np.random.uniform(-1, 1)) parts['alpha'] = alpha beta = pymc.Uninformative('beta', value = np.random.uniform(-np.pi/2, np.pi/2)) parts['beta'] = beta sigint2 = pymc.Uniform('sigint2', 1e-4, 1.) parts['sigint2'] = sigint2 piprior = pymc.Dirichlet('pi', np.ones(ngauss)) parts['piprior'] = piprior @pymc.deterministic(trace=False) def pis(piprior = piprior): lastpi = 1. - np.sum(piprior) allpi = np.zeros(ngauss) allpi[:-1] = piprior allpi[-1] = lastpi return allpi parts['pis'] = pis mu0 = pymc.Uninformative('mu0', np.random.uniform(-1, 1)) parts['mu0'] = mu0 w2 = pymc.Uniform('w2', 1e-4, 1e4) parts['w2'] = w2 xvars = pymc.InverseGamma('xvars', 0.5, w2, size=ngauss+1) #dropping the 1/2 factor on w2, because I don't think it matters parts['xvars'] = xvars @pymc.deterministic(trace=False) def tauU2(xvars = xvars): return 1./xvars[-1] parts['tauU2'] = tauU2 xmus = pymc.Normal('xmus', mu0, tauU2, size = ngauss) parts['xmus'] = xmus @pymc.observed def likelihood(value = 0., x = x, xerr2 = xerr**2, y = y, yerr2 = yerr**2, xycovar = xycovar, alpha = alpha, beta = beta, sigint2 = sigint2, pis = pis, xmus = xmus, xvars = xvars): return stats.kelly_like(x = x, xerr2 = xerr2, y = y, yerr2 = yerr2, xycovar = xycovar, alpha = alpha, beta = beta, sigint2 = sigint2, pis = pis, mus = xmus, tau2 = xvars[:-1]) parts['likelihood'] = likelihood
# model = models[model_choice] # methodmenu = menu.MenuSystem('Choose appropriate method') # methodmenu.add_entry('a', 'Line average') # methodmenu.add_entry('b', 'Line integral') # method_choice = str(methodmenu.run()) # method = aggregate_options[method_choice] # print('The method is ' + str(method)) ############################################################################ # Set up the model ############################################################################ Q = pm.Uniform('Q', lower=0, upper=1) tau = pm.Uninformative('tau', value=1) z = 2.3 # Height of instrument tower. # Compute perturbation matrix for reading_predicted = np.empty_like(perturbation) for i, (reflector, u, theta, T, P, L) in enumerate( zip(reflectors, wind_speed, plume_dir, temp, pressure, lvals)): params = {'z': z, 'L': L, 'U': u, 'H': h_source} reflector = int(reflector) reading_predicted[i] = util.line_average([source_x, source_y], p0_list[reflector], p1_list[reflector], z_list[reflector], samples, 1, h_source, theta, T, P, params, 'gaussian')
def anneal_w_graphics(n=11, depth=10): """ Make an animation of the BDST chain walking on an nxn grid and play it """ ni = 5 nj = 100 nk = 5 beta = mc.Uninformative('beta', value=1.) G = nx.grid_graph([n, n]) G.orig_pos = dict([[v, v] for v in G.nodes_iter()]) G.pos = dict([[v, v] for v in G.nodes_iter()]) root = (5, 5) bdst = BDST(G, root, depth, beta) mod_mc = mc.MCMC([beta, bdst]) mod_mc.use_step_method(STMetropolis, bdst) mod_mc.use_step_method(mc.NoStepper, beta) for i in range(ni): beta.value = i * 5 for j in range(nj): mod_mc.sample(1) T = bdst.value for k in range(nk): if random.random() < .95: delta_pos = nx.spring_layout(T, pos=G.pos, fixed=[root], iterations=1) else: delta_pos = G.orig_pos eps = .01 my_avg = lambda x, y: (x[0] * (1. - eps) + y[0] * eps, x[1] * (1. - eps) + y[1] * eps) for v in G.pos: G.pos[v] = my_avg(G.pos[v], delta_pos[v]) views.plot_graph_and_tree(G, T, time=1. * k / nk) str = '' str += ' beta: %.1f\n' % beta.value str += ' cur depth: %d (target: %d)\n' % (T.depth, depth) sm = mod_mc.step_method_dict[bdst][0] str += ' accepted: %d of %d\n' % (sm.accepted, sm.accepted + sm.rejected) plt.figtext(0, 0, str) plt.figtext(1, 0, 'healthyalgorithms.wordpress.com \n', ha='right') plt.axis([-1, n, -1, n]) plt.axis('off') plt.subplots_adjust(0, 0, 1, 1) plt.savefig('bdst%06d.png' % (i * nj * nk + j * nk + k)) print 'accepted:', mod_mc.step_method_dict[bdst][0].accepted import subprocess subprocess.call( 'mencoder mf://bdst*.png -mf w=800:h=600 -ovc x264 -of avi -o bdst_G_%d_d_%d.avi' % (n, depth), shell=True) subprocess.call('mplayer -loop 0 bdst_G_%d_d_%d.avi' % (n, depth), shell=True) subprocess.call('rm bdst*.png') return bdst.value
pl.xlabel('Rate ($r$)') pl.ylabel('Study Size ($n$)') pl.axis([-.0001, .0101, 50., 1500000]) pl.legend(numpoints=1, fancybox=True, shadow=True, prop={'size': 'x-large'}) pl.subplots_adjust(bottom=.13, top=.93) pl.savefig('book/graphics/binomial-model-funnel.pdf') pl.savefig('book/graphics/binomial-model-funnel.png') ### @export 'binomial-model-problem' n = 50000 pop_A_prev = .002 pop_A_N = n pop_B_prev = .006 pop_B_N = n pi = mc.Uninformative('pi', value=pop_A_prev) @mc.potential def obs(pi=pi): return pop_A_prev*pop_A_N*pl.log(pi) + (1-pop_A_prev)*pop_A_N*pl.log(1-pi) \ + pop_B_prev*pop_B_N*pl.log(pi) + (1-pop_B_prev)*pop_B_N*pl.log(1-pi) pop_C_N = n pop_C_k = mc.Binomial('pop_C_k', pop_C_N, pi) mc.MCMC([pi, obs, pop_C_k]).sample(20000, 10000, 2, verbose=False, progress_bar=False)
def make_model(recs, curve_sub, curve_params=[], pr_type='mixed', pr_hists=None, pr_samps=None, check_inflec=True): input_dict = curve_sub(*curve_params) arfun = input_dict['arfun'] fun_params = input_dict['fun_params'] # if pr_type=='unknown': # splreps = [] # for i in xrange(len(pr_hists)): # where_ok = np.where(pr_hists[i][0]>0) # pr_mesh = pr_hists[i][1][where_ok] # lp_mesh = np.log(pr_hists[i][0][where_ok]) # splreps.append(UnivariateSpline(pr_mesh, lp_mesh, bbox=[0,1])) # # @pm.stochastic(dtype=float) # def pr(value = pr_hists[:,1,10], splreps = splreps): # out=0 # for i in xrange(len(value)): # this_value = value[i] # if this_value<0 or this_value>1: # return -np.inf # else: # out += splreps[i](this_value) # return out if pr_type == 'model_exp': pr = recs.mbg_pr elif pr_type == 'data': pr = recs.pr elif pr_type == 'mixed': pr = recs.mix_pr elif pr_type == 'data_untrans': pr = recs.pfpr else: raise ValueError, 'PR type unknown' # # A deterministic that measures the change in attack rate given a certain change in PR. # delta_ar = pm.Lambda('delta_ar', lambda fp = fun_params: np.diff(arfun(diff_pts, *fp))) fboth = pm.Lambda( 'fboth', lambda fp=fun_params, pr=pr: arfun(np.hstack((pr, xplot)), *fp)) # Evaluation of trend at PR values AR_trend = pm.Lambda('AR_trend', lambda fp=fun_params, pr=pr: arfun(pr, *fp)) # The function evaluated on the display mesh fplot = pm.Lambda('fplot', lambda fp=fun_params: arfun(xplot, *fp)) pl.clf() pl.plot(xplot, fplot.value) @pm.potential def check_trend(AR=AR_trend, f=fplot): if np.any(AR <= 0) or np.any(f <= 0): return -np.Inf if check_inflec: d2 = np.diff(f, 2) d2 = d2[np.where(np.abs(d2) > 1e-6)] chgs = np.where(np.abs(np.diff(np.sign(d2))) > 1)[0] if np.diff(f[-3:], 2) > 0 or len(chgs) > 1: return -np.Inf return 0 # Negative-binomial parameters. r_int = pm.Exponential('r_int', .0001, value=.3) r_lin = pm.Uninformative('r_lin', value=1.) r_quad = pm.Uninformative('r_quad', value=.1) rplot = pm.Lambda('rplot', lambda r_int=r_int, r_lin=r_lin, r_quad=r_quad: r_int + r_lin * xplot + r_quad * xplot**2) @pm.potential def check_r(i=r_int, l=r_lin, q=r_quad): # if q>0: # xhat = -l / 2 / q # if i + l*xhat + q*xhat*xhat <= 0 and xhat>0: # return -np.Inf if l <= 0 or l + 2. * q <= 0: return -np.Inf if i + l + q <= 0 or i < 0: return -np.Inf return 0 # shape parameter of gamma process is multiplied by total survey time time_scale_fac = time_scaling(recs.pcd, recs.surv_int) tottime = (recs.yr_end - recs.yr_start + 1) scale_time = tottime / time_scale_fac pop = recs.pyor / tottime # Shape parameter of Poisson intensity is only multiplied by scaled survey time. r = pm.Lambda('r', lambda i=r_int, l=r_lin, q=r_quad, pr=pr: (i + l * pr + q * pr * pr) * scale_time) # scale parameter of Poisson intensity is multiplied by scaled survey time * number of people sampled. exp_rate = pm.Lambda('exp_rate', lambda t=AR_trend: scale_time * pop * t) # The data AR = pm.NegativeBinomial('AR', exp_rate, r, value=recs.cases, observed=True) @pm.deterministic(dtype=float) def AR_dev(AR=AR, mu=exp_rate, r=r): return np.array([ pm.negative_binomial_like(AR[i], mu[i], r[i]) for i in xrange(len(AR)) ]) out = locals() out.update(input_dict) return out
def buildGaussMixture1DModel(halos, ngauss, modeltype='ratio'): parts = {} ### PDF handling massnorm = 1e15 masses = halos[0]['masses'] nmasses = len(masses) nclusters = len(halos) delta_masses = np.zeros((nclusters, nmasses - 1)) delta_mls = np.zeros((nclusters, nmasses)) pdfs = np.zeros((nclusters, nmasses)) #also need to collect some statistics, to init mixture model pdfmeans = np.zeros(nclusters) pdfwidths = np.zeros(nclusters) for i in range(nclusters): if modeltype == 'additive': delta_masses[i, :] = (masses[1:] - masses[:-1]) / massnorm delta_mls[i, :] = (masses - halos[i]['true_mass']) / massnorm pdfs[i, :] = halos[i][ 'pdf'] * massnorm #preserve unitarity under integration elif modeltype == 'ratio': delta_masses[i, :] = (masses[1:] - masses[:-1]) / halos[i]['true_mass'] delta_mls[i, :] = masses / halos[i]['true_mass'] pdfs[i, :] = halos[i]['pdf'] * halos[i]['true_mass'] pdfmeans[i] = scipy.integrate.trapz(delta_mls[i, :] * pdfs[i, :], delta_mls[i, :]) pdfwidths[i] = np.sqrt( scipy.integrate.trapz( pdfs[i, :] * (delta_mls[i, :] - pdfmeans[i])**2, delta_mls[i, :])) datacenter = np.mean(pdfmeans) dataspread = np.std(pdfmeans) datatypvar = np.mean(pdfwidths) dataminsamp = np.min(delta_masses) print datacenter, dataspread, datatypvar, dataminsamp #### Mixture model priors piprior = pymc.Dirichlet('piprior', np.ones(ngauss)) parts['piprior'] = piprior mu0 = pymc.Uninformative( 'mu0', datacenter + np.random.uniform(-5 * dataspread, 5 * dataspread)) parts['mu0'] = mu0 # kelly07 xvars prior. # w2 = pymc.Uniform('w2', 0.1/dataspread**2., 100*max(1./dataspread**2, 1./datatypvar**2)) # print w2.parents # parts['w2'] = w2 # # # xvars = pymc.InverseGamma('xvars', 0.5, 0.5*w2, size=ngauss+1) #dropping the 1/2 factor on w2, because I don't think it matters logxsigma = pymc.Uniform('logxsigma', np.log(2 * dataminsamp), np.log(5 * dataspread), size=ngauss + 1) parts['logxsigma'] = logxsigma @pymc.deterministic(trace=False) def xvars(logxsigma=logxsigma): return np.exp(logxsigma)**2 parts['xvars'] = xvars @pymc.deterministic(trace=False) def tauU2(xvars=xvars): return 1. / xvars[-1] parts['tauU2'] = tauU2 xmus = pymc.Normal('xmus', mu0, tauU2, size=ngauss) parts['xmus'] = xmus @pymc.observed def data(value=0., delta_mls=delta_mls, delta_masses=delta_masses, pdfs=pdfs, piprior=piprior, xmus=xmus, xvars=xvars): #complete pi pis = pymc.extend_dirichlet(piprior) # print pis # #enforce identiability by ranking means # for i in range(xmus.shape[0]-1): # if (xmus[i] >= xmus[i+1:]).any(): # raise pymc.ZeroProbability # return dlntools.pdfGaussMix1D(delta_mls=delta_mls, delta_masses=delta_masses, pdfs=pdfs, pis=pis, mus=xmus, tau2=xvars[:-1]) parts['data'] = data return parts
def pred(pi=pi): return mc.rpoisson(pi * n_pred) / float(n_pred) ### @export 'poisson-fit-and-store' mc.MCMC([pi, obs, pred]).sample(iter, burn, thin, verbose=False, progress_bar=False) results['Poisson'] = dict(pred=pred.stats(), pi=pi.stats()) ### @export 'negative-binomial-model' pi = mc.Uniform('pi', lower=0, upper=1, value=.5) delta = mc.Uninformative('delta', value=100.) @mc.potential def obs(pi=pi, delta=delta): return mc.negative_binomial_like(r * n, pi * n, delta) @mc.deterministic def pred(pi=pi, delta=delta): return mc.rnegative_binomial(pi * n_pred, delta) / float(n_pred) ### @export 'negative-binomial-fit-and-store' mc.MCMC([pi, delta, obs, pred]).sample(iter, burn,
pl.figure(figsize=(11, 8.5), dpi=120) pl.subplots_adjust(wspace=.4) pl.subplot(2, 2, 1) plot_beta_binomial_funnel(4., 996.) pl.subplot(2, 2, 2) plot_beta_binomial_funnel(40., 9960.) pl.subplot(2, 1, 2) r = pl.array(schiz['r']) n = pl.array(schiz['n'], dtype=int) k = r * n alpha = mc.Uninformative('alpha', value=1.) beta = mc.Uninformative('beta', value=999.) pi = mc.Beta('pi', alpha, beta, value=.001 * pl.ones(16)) pi_mean = mc.Lambda('pi_mean', lambda alpha=alpha, beta=beta: alpha / (alpha + beta)) @mc.potential def obs(pi=pi): return mc.binomial_like(k, n, pi) @mc.deterministic def pred(pi=pi, alpha=alpha, beta=beta): return mc.rbetabin(alpha, beta, n)
# ========================================= # # This notebook implements and compares samplers in PyMC # to sample uniformly from an $n$-dimensional ball, # i.e to sample from the set # $$ # \mathbf{B}_n = \\{x \in \mathbf{R}^n: \|x\|\leq 1\\} # $$ # <codecell> mc.np.random.seed(1234567) # simple model n = 2 X = [mc.Uninformative('X_%d' % i, value=0) for i in range(n)] @mc.potential def in_ball(X=X): if X[0]**2 + X[1]**2 <= 1.: return 0 else: return -pl.inf # <codecell> class UniformBall(mc.Gibbs): def __init__(self, stochastic, others, verbose=None):
def make_model(lon,lat,input_data,covariate_keys,n_male,male_pos,n_fem,fem_pos): """ This function is required by the generic MBG code. """ # How many nuggeted field points to handle with each step method grainsize = 10 # Unique data locations data_mesh, logp_mesh, fi, ui, ti = uniquify(lon,lat) a = pm.Exponential('a', .01, value=1) b = pm.Exponential('b', .01, value=1) init_OK = False while not init_OK: try: # The partial sill. amp = pm.Exponential('amp', .1, value=1.) # The range parameters. Units are RADIANS. # 1 radian = the radius of the earth, about 6378.1 km scale = pm.Exponential('scale', .1, value=.08) # This parameter controls the degree of differentiability of the field. diff_degree = pm.Uniform('diff_degree', .01, 3) # The nugget variance. V = pm.Exponential('V', .1, value=.1) @pm.potential def V_constraint(V=V): if V<.1: return -np.inf else: return 0 m = pm.Uninformative('m',value=-25) @pm.deterministic(trace=False) def M(m=m): return pm.gp.Mean(mean_fn, m=m) # Create the covariance & its evaluation at the data locations. facdict = dict([(k,1.e6) for k in covariate_keys]) facdict['m'] = 0 @pm.deterministic(trace=False) def C(amp=amp, scale=scale, diff_degree=diff_degree, ck=covariate_keys, id=input_data, ui=ui, facdict=facdict): """A covariance function created from the current parameter values.""" eval_fn = CovarianceWithCovariates(cut_matern, id, ck, ui, fac=facdict) return pm.gp.FullRankCovariance(eval_fn, amp=amp, scale=scale, diff_degree=diff_degree) sp_sub = pm.gp.GPSubmodel('sp_sub', M, C, logp_mesh, tally_f=False) init_OK = True except pm.ZeroProbability: init_OK = False cls,inst,tb = sys.exc_info() print 'Restarting, message %s\n'%inst.message # Make f start somewhere a bit sane sp_sub.f_eval.value = sp_sub.f_eval.value - np.mean(sp_sub.f_eval.value) # Loop over data clusters eps_p_f_d = [] s_d = [] male_d = [] het_def_d = [] fem_d = [] for i in xrange(len(male_pos)/grainsize+1): sl = slice(i*grainsize,(i+1)*grainsize,None) if len(male_pos[sl])>0: # Nuggeted field in this cluster eps_p_f_d.append(pm.Normal('eps_p_f_%i'%i, sp_sub.f_eval[fi[sl]], 1./V, trace=False)) # The allele frequency s_d.append(pm.Lambda('s_%i'%i,lambda lt=eps_p_f_d[-1]: invlogit(lt), trace=False)) where_male = np.where(True-np.isnan(n_male[sl]))[0] where_fem = np.where(True-np.isnan(n_fem[sl]))[0] if len(where_male) > 0: male_d.append(pm.Binomial('male_%i'%i, n_male[sl][where_male], s_d[-1][where_male], value=male_pos[sl][where_male], observed=True)) if len(where_fem) > 0: het_def_d.append(pm.Beta('het_def_%i'%i, alpha=a, beta=b, size=len(where_fem), trace=False)) p = s_d[-1][where_fem] p_def = pm.Lambda('p_def', lambda p=p, h=het_def_d[-1]: p_fem_def(p, h), trace=False) fem_d.append(pm.Binomial('fem_%i'%i, n_fem[sl][where_fem], p_def, value=fem_pos[sl][where_fem], observed=True)) # The field plus the nugget @pm.deterministic def eps_p_f(eps_p_fd = eps_p_f_d): """Concatenated version of eps_p_f, for postprocessing & Gibbs sampling purposes""" return np.hstack(eps_p_fd) # The heterozygote deficiency @pm.deterministic def het_def(het_def_d = het_def_d): return np.hstack(het_def_d) return locals()
def __init__(self, predictions, measurements, uncertainties, regularization_strength=1.0, precision=None, prior_pops=None): """Bayesian Energy Landscape Tilting with maximum entropy prior and correlation-corrected likelihood. Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment regularization_strength : float How strongly to weight the MVN prior (e.g. lambda) precision : ndarray, optional, shape = (num_measurements, num_measurements) The precision matrix of the predicted observables. prior_pops : ndarray, optional, shape = (num_frames) Prior populations of each conformation. If None, use uniform populations. """ BELT.__init__(self, predictions, measurements, uncertainties, prior_pops=prior_pops) if precision == None: precision = np.cov(predictions.T) if precision.ndim == 0: precision = precision.reshape((1, 1)) self.alpha = pymc.Uninformative( "alpha", value=np.zeros(self.num_measurements) ) # The prior on alpha is defined as a potential, so we use Uninformative variables here. self.initialize_variables() @pymc.potential def logp_prior(populations=self.populations, mu=self.mu, prior_pops=self.prior_pops): if populations.min() <= 0: return -1 * np.inf else: return -1 * regularization_strength * ( populations * (np.log(populations / prior_pops))).sum() self.logp_prior = logp_prior rho = np.corrcoef(predictions.T) rho_inverse = np.linalg.inv(rho) @pymc.potential def logp(populations=self.populations, mu=self.mu): z = (mu - measurements) / uncertainties chi2 = rho_inverse.dot(z) chi2 = z.dot(chi2) return -0.5 * chi2 self.logp = logp
def make_model(X): neighbors, triangles, trimap, b = spherical.triangulate_sphere(X) # spherical.plot_triangulation(X,neighbors) # Matrix generation triangle_areas = [spherical.triangle_area(X, t) for t in triangles] Ctilde = spherical.Ctilde(X, triangles, triangle_areas) C = spherical.C(X, triangles, triangle_areas) G = spherical.G(X, triangles, triangle_areas) # Operator generation Ctilde = cholmod.into_matrix_type(Ctilde) G = cholmod.into_matrix_type(G) # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa. amp = pm.Exponential('amp', .0001, value=100) # A constant mean. m = pm.Uninformative('m', value=0) @pm.deterministic(trace=False) def M(m=m, n=len(X)): """The mean vector""" return np.ones(n) * m kappa = pm.Exponential('kappa', 1, value=3) alpha = pm.DiscreteUniform('alpha', 1, 10, value=2., observed=True) @pm.deterministic(trace=False) def Q(kappa=kappa, alpha=alpha, amp=amp): out = operators.mod_frac_laplacian_precision( Ctilde, G, kappa, alpha, cholmod) / np.asscalar(amp)**2 return out # Nailing this ahead of time reduces time to compute logp from .18 to .13s for n=25000. pattern_products = cholmod.pattern_to_products(Q.value) # @pm.deterministic # def pattern_products(Q=Q): # return cholmod.pattern_to_products(Q) @pm.deterministic(trace=False) def precision_products(Q=Q, p=pattern_products): try: return cholmod.precision_to_products(Q, **p) except cholmod.NonPositiveDefiniteError: return None S = pymc_objects.SparseMVN('S', M, precision_products, cholmod) vars = pm.rgamma(4, 4, size=n) vals = X[:, 2] data = pm.Normal('data', S, 1. / vars, value=vals, observed=True) Qobs = sparse.csc_matrix((n, n)) Qobs.setdiag(1. / vars) @pm.deterministic(trace=False) def true_evidence(Q=Q, M=M, vals=vals, vars=vars): C = np.array(Q.todense().I + np.diag(vars)) return pm.mv_normal_cov_like(vals, M, C) # Stuff for the scoring algorithm-based full conditional def first_likelihood_derivative(x, vals=vals, vars=vars): return -(x - vals) / vars def second_likelihood_derivative(x, vals=vals, vars=vars): return -1. / vars return locals()
def fit_blackbody_montecarlo(frequency, seds, errors=None, temperature_guess=10, beta_guess=None, scale_guess=None, blackbody_function=blackbody, quiet=True, return_MC=True, nsamples=5000, burn=1000, min_temperature=0, max_temperature=100, scale_keyword='scale', max_scale=1e60, multivariate=False, **kwargs): """ Parameters ---------- frequency : array Array of frequency values flux : array array of flux values err : array (optional) Array of error values (1-sigma, normal) temperature_guess : float Input / starting point for temperature min_temperature : float max_temperature : float Lower/Upper limits on fitted temperature beta_guess : float (optional) Opacity beta value scale_guess : float Arbitrary scale value to apply to model to get correct answer blackbody_function: function Must take x-axis (e.g. frequency), temperature, then scale and beta keywords (dependence on beta can be none) return_MC : bool Return the pymc.MCMC object? nsamples : int Number of samples to use in determining the posterior distribution (the answer) burn : int number of initial samples to ignore scale_keyword : ['scale','logscale','logN'] What scale keyword to pass to the blackbody function to determine the amplitude kwargs : kwargs passed to blackbody function """ d = {} d['temperature'] = pymc.distributions.Uniform('temperature', min_temperature, max_temperature, value=temperature_guess) d['scale'] = pymc.distributions.Uniform('scale', 0, max_scale, value=scale_guess) if beta_guess is not None: d['beta'] = pymc.distributions.Uniform('beta', 0, 10, value=beta_guess) else: d['beta'] = pymc.distributions.Uniform('beta', 0, 10, value=1) covar_list = dict([ ((i, j), pymc.Uninformative('%s-%s' % (i, j), value=(i == j))) for i, j in itertools.combinations_with_replacement(('t', 'b', 's'), 2) ]) for i, j in itertools.permutations(('t', 'b', 's'), 2): if (i, j) in covar_list: covar_list[(j, i)] = covar_list[(i, j)] covar_grid = [[covar_list[(i, j)] for i in ('t', 'b', 's')] for j in ('t', 'b', 's')] d['tbcov'] = pymc.MvNormalCov( 'tbcov', mu=[d['temperature'], d['beta'], d['scale']], C=covar_grid, value=[d['temperature'], d['beta'], d['scale']]) precision_list = dict([ ((i, j), pymc.Uninformative('%s-%s' % (i, j), value=(i == j))) for i, j in itertools.combinations_with_replacement(('t', 'b', 's'), 2) ]) for i, j in itertools.permutations(('t', 'b', 's'), 2): if (i, j) in precision_list: precision_list[(j, i)] = precision_list[(i, j)] precision_grid = [[precision_list[(i, j)] for i in ('t', 'b', 's')] for j in ('t', 'b', 's')] # need to force tau > 0... d['tbprec'] = pymc.MvNormalCov( 'tbprec', mu=[d['temperature'], d['beta'], d['scale']], C=precision_grid, value=[1, 1, 1]) for ii, (sed, err) in enumerate(zip(seds, errors)): d['t_%i' % ii] = pymc.Normal('t_%i' % ii, mu=d['tbcov'][0], tau=d['tbprec'][0]) d['b_%i' % ii] = pymc.Normal('b_%i' % ii, mu=d['tbcov'][1], tau=d['tbprec'][1]) d['s_%i' % ii] = pymc.Normal('s_%i' % ii, mu=d['tbcov'][2], tau=d['tbprec'][2]) def bb_model(temperature=d['t_%i' % ii], scale=d['s_%i' % ii], beta=d['b_%i' % ii]): kwargs[scale_keyword] = scale y = blackbody_function(frequency, temperature, beta=beta, normalize=False, **kwargs) #print kwargs,beta,temperature,(-((y-flux)**2)).sum() return y d['bb_model_%i' % ii] = pymc.Deterministic(eval=bb_model, name='bb_model_%i' % ii, parents={ 'temperature': d['t_%i' % ii], 'scale': d['s_%i' % ii], 'beta': d['b_%i' % ii] }, doc='Blackbody SED model.', trace=True, verbose=0, dtype=float, plot=False, cache_depth=2) if err is None: d['err_%i' % ii] = pymc.distributions.Uninformative('error_%i' % ii, value=1.) else: d['err_%i' % ii] = pymc.distributions.Uninformative('error_%i' % ii, value=err, observed=True) d['flux_%i' % ii] = pymc.distributions.Normal('flux_%i' % ii, mu=d['bb_model_%i' % ii], tau=1. / d['err_%i' % ii]**2, value=sed, observed=True) #print d.keys() MC = pymc.MCMC(d) if nsamples > 0: MC.sample(nsamples, burn=burn) if return_MC: return MC MCfit = pymc.MAP(MC) MCfit.fit() T = MCfit.temperature.value scale = MCfit.scale.value if beta_guess is not None: beta = MCfit.beta.value return T, scale, beta else: return T, scale return MC
def make_model(N,k,X,backend,manifold): """ A standard spatial logistic regression. - N: Number sampled at each location - k: Number positive at each location - X: x,y,z coords of each location - Backend: The linear algebra backend. So far, this has to be 'cholmod'. - manifold: The manifold to work on. So far, this has to be 'spherical'. """ # Make the Delaunay triangulation. neighbors, triangles, trimap, b = manifold.triangulate_sphere(X) # Uncomment to visualize the triangulation. # manifold.plot_triangulation(X,neighbors) # Generate the C, Ctilde and G matrix in SciPy 'lil' format. triangle_areas = [manifold.triangle_area(X, t) for t in triangles] Ctilde = manifold.Ctilde(X, triangles, triangle_areas) C = manifold.C(X, triangles, triangle_areas) G = manifold.G(X, triangles, triangle_areas) # Convert to SciPy 'csc' format for efficient use by the CHOLMOD backend. C = backend.into_matrix_type(C) Ctilde = backend.into_matrix_type(Ctilde) G = backend.into_matrix_type(G) # Kappa is the scale parameter. It's a free variable. kappa = pm.Exponential('kappa',1,value=3) # Fix the value of alpha. alpha = 2. # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa. amp = pm.Exponential('amp', .0001, value=100) # A constant mean. m = pm.Uninformative('m',value=0) @pm.deterministic(trace=False) def M(m=m,n=len(X)): """The mean vector""" return np.ones(n)*m @pm.deterministic(trace=False) def Q(kappa=kappa, alpha=alpha, amp=amp, Ctilde=Ctilde, G=G, backend=backend): "The precision matrix." out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha, backend)/np.asscalar(amp)**2 return out # Do all the precomputation you can based on the sparsity pattern alone. # Note that if alpha is made free, this needs to be free also, as the sparsity # pattern will be changeable. pattern_products = backend.pattern_to_products(Q.value) @pm.deterministic(trace=False) def precision_products(Q=Q, p=pattern_products): "All the analysis of the precision matrix that the backend needs to do MVN computations." try: return backend.precision_to_products(Q, **p) except backend.NonPositiveDefiniteError: return None # The random field. empirical_S = pm.logit((k+1)/(N+2.)) S=pymc_objects.SparseMVN('S',M, precision_products, backend, value=empirical_S) @pm.deterministic(trace=False) def p(S=S): """The success probability.""" return pm.invlogit(S) # The data. data = pm.Binomial('data', n=N, p=p, value=k, observed=True) # A Fortran representation of the likelihood, to allow for fast Metropolis steps without querying data.logp. likelihood_variables = np.vstack((np.resize(N,k.shape),k)).T likelihood_string = """ lkp = dexp({X})/(1.0D0+dexp({X})) lkp = lv(i,2)*dlog(lkp) + (lv(i,1)-lv(i,2))*dlog(1.0D0-lkp) """ return locals()
'var2_t': pymc.Gamma, } scipy_distributions = {'a_t': lambda a, b: st.beta.rvs(a, b), 'mu1_t': lambda a, b: st.gamma.rvs(a, scale=1/b), 'mu2_t': lambda a, b: st.gamma.rvs(a, scale=1/b), 'var1_t': lambda a, b: st.gamma.rvs(a, scale=1/b), 'var2_t': lambda a, b: st.gamma.rvs(a, scale=1/b), } # %% variabili_ = ['mu1_t','var1_t','mu2_t','var2_t'] param_ = [mu1,var1,mu2,var2] for var_i in np.arange(2): variable = variabili_[var_i] param = param_[var_i] groups = {k:group for k, group in database_random.groupby('sampleID')[variable]} a = pymc.Uninformative('a', value=1) b = pymc.Uninformative('b', value=1) variables = [a, b] distribution = pymc_distributions[variable] for k, g in groups.items(): obs = distribution('obs{}'.format(k), alpha = a, beta = b, observed=True, value=g.values) variables.append(obs) model_map = pymc.MAP(variables) model_map.fit() model_mcmc = pymc.MCMC(variables) model_mcmc.sample(1e5)
def make_model(self, sim, params): initial_values = {} initial_components = {} model_dict = {} def runit(**kwargs): sim.params(**kwargs) for key in initial_values: initial_components[key].initial_value = initial_values[key] try: sim.run_fast() return 0 except FloatingPointError: return -1 params['std_dev'] = params.get('std_dev', [1e-3, 200]) with pymc.Model() as model: std_dev = pymc.Uniform('std_dev', params['std_dev'][0], params['std_dev'][1]) del params['std_dev'] # @pymc.deterministic(plot=False) # def precision(std_dev=std_dev): # return 1.0 / (std_dev * std_dev) # for key in params: if key.startswith('initial_'): if params[key][0] is None: initial_values[key] = pymc.Uninformative( key, value=params[key][1]) else: if len(params[key]) == 2: initial_values[key] = pymc.Uniform( key, params[key][0], params[key][1]) elif len(params[key]) == 3: initial_values[key] = pymc.Uniform( key, params[key][0], params[key][1], value=params[key][2]) else: raise ValueError name = key.split('initial_')[1] _c = sim.get_component(name) initial_components[key] = _c else: if params[key][0] is None: params[key] = pymc.Uninformative(key, value=params[key][1]) else: if len(params[key]) == 2: params[key] = pymc.Uniform(key, params[key][0], params[key][1]) elif len(params[key]) == 3: params[key] = pymc.Uniform(key, params[key][0], params[key][1], value=params[key][2]) else: raise ValueError for key in initial_values: del params[key] run_sim = pymc.Deterministic(eval=runit, doc="this", name='run_sim', parents=dict( list(params.items()) + list(initial_values.items()))) def make_fun(var): def fun(run_sim=run_sim, sim=sim): _c = sim.get_component(var) if _c.data: t = _c.data['t'] value = sim.interpolate(t, var) else: t = sim.maximum_t value = sim.interpolate(t, var) return value return fun for _c in sim.components + sim.assignments: params[_c.name] = pymc.Deterministic(eval=make_fun(_c.name), doc=_c.name, name=_c.name, parents={ 'run_sim': run_sim, 'sim': sim }) if _c.data: varname = _c.name + '_data' params[varname] = pymc.Normal(varname, mu=params[_c.name], tau=1.0 / std_dev**2, observed=True, value=_c.data['value']) return model