def init_mcModel(self): self.logpTrace = zeros(10) self.cnt = 0 #############init pymc model###################### #priors for rate constants k = pymc.Normal('k', zeros_like(self.k0), 1. / logkSigma**2) #modeled species concentrations Dmod = pymc.Deterministic(self.forwardSoln, 'modeled species concentrations', 'Dmod', {'k': k}, verbose=0, plot=False) #observed species concentrations Dobs = pymc.Stochastic(self.modelLogProbability, 'measured species concentrations', 'Dobs', {'Dmod': Dmod}, value=self.D, verbose=0, plot=False, observed=True) self.mcModel = pymc.Model([k, Dmod, Dobs]) self.mcmc = pymc.MCMC(self.mcModel)
def _create_kde_stochastic(self, kde, kde_name, param_names): ''' Creates custom pymc stochastic object based on a multivariate kernel density estimate (kde should be kde object from scipy). ''' # build kde stochastic logp = lambda value: kde.logpdf(value) random = lambda value: kde.resample(1).flatten() KDE = pymc.Stochastic(logp=logp, doc='multivariate KDE', value=random(0), name=kde_name, parents=dict(), random=random, trace=True, dtype=float, observed=False, plot=True) # build deterministics dependent on kde stochastic rvs = dict() eval_func_dict = dict() for i, pn in enumerate(param_names): eval_func_dict[pn] = lambda i=i, **kwargs: kwargs[kde_name][i] rvs[pn] = pymc.Deterministic(eval=eval_func_dict[pn], name=pn, parents={kde_name: KDE}, doc='model param %s' % pn, trace=True, dtype=float, plot=True) return KDE, rvs
def ResidualModel(name,value,predicted,splrep_tck_Y,rangm,KDEminmax,KDE_Object_Y,observed=False,trace=False): def logp(value,predicted,rangm,splrep_tck_Y,KDEminmax,KDE_Object_Y): res = predicted-value p = np.array([float(KDEminmax[0]*0.99**((-r)-rangm[0])) if r <= rangm[0] else float(KDEminmax[1]*0.99**(r-rangm[1])) if r>= rangm[1] else float(si.splev(r,splrep_tck_Y)) for r in res]) try: # Check to make sure there in illogical values if math.isnan(sum(np.log(p))) == True or math.isinf(sum(np.log(p))) == True: logp = float(-1.7976931348623157e+300) else: logp = sum(np.log(p)) except: print p, sum(p) return logp def random(predicted,KDE_Object_Y): result = KDE_Object_Y.resample(len(predicted))[0][0] print KDE_Object_Y return result result = pymc.Stochastic(logp = logp, name = name, parents = {'predicted' : predicted, 'splrep_tck_Y' : splrep_tck_Y, 'rangm' : rangm, 'KDEminmax' : KDEminmax, 'KDE_Object_Y':KDE_Object_Y }, value = value, random = random, observed = observed, doc='Likelihood Probability leaf lengths P(L|L_hat)= P(Residuals)', trace = trace, verbose=0,dtype=float,plot=False,cache_depth = 2) return result
def get_pymc_model(probabilities): def arrival_logp(value, probs): if value < 0 or value >= len(probs): return -np.inf prob = probs[value] if prob <= 0: return -np.inf else: return np.log(prob) def arrival_rand(probs): return np.random.choice(len(probs), p=probs) arrival_model = pm.Stochastic(logp=arrival_logp, doc='The index of the arrival.', name='arrival', parents={'probs': probabilities}, random=arrival_rand, trace=True, dtype=int, observed=False, cache_depth=2, plot=True, verbose=0) return arrival_model
def KernelSmoothing(name, dataset, bw_method=None, lower=-np.inf, upper=np.inf, observed=False, value=None): '''Create a pymc node whose distribution comes from a kernel smoothing density estimate.''' density = calculate_kde(dataset, bw_method) lower_tail = 0 upper_tail = 0 if lower > -np.inf: lower_tail = density.integrate_box(-np.inf, lower) if upper < np.inf: upper_tail = density.integrate_box(upper, np.inf) factor = 1.0 / (1.0 - lower_tail - upper_tail) def logp(value): if value < lower or value > upper: return -np.inf d = density(value) if d == 0.0: return -np.inf return np.log(factor * density(value)) def random(): result = None while result == None: result = density.resample(1)[0][0] if result < lower or result > upper: result = None return result if value == None: value = random() dtype = type(value) result = pymc.Stochastic(logp=logp, doc='A kernel smoothing density node.', name=name, parents={}, random=random, trace=True, value=dataset[0], dtype=dtype, observed=observed, cache_depth=2, plot=True, verbose=0) return result
def init_mcModel(self): #############init pymc model###################### print " stochastics initializing" #priors for rate constants k = pymc.Normal('k', zeros(self.FM.nK), 1. / self.logRateConstantStdDev**2) print " k is done" #priors for initial concentrations s = pymc.Normal('s', zeros(self.FM.nS), 1. / self.logInitialConcentrationsStdDev**2) print " s is done" #modeled species concentrations D_mod = pymc.Deterministic(self.FM, 'modeled species concentrations', 'D_mod', { 'k': k, 's': s }, verbose=0, plot=False) print " D_mod is done" #observed species concentrations D_obs = pymc.Stochastic(self.modelLikelihood, 'measured species concentrations', 'D_obs', {'D_mod': D_mod}, value=self.D_obs.val(), verbose=0, plot=False, observed=True) print " D_obs is done" print " mcModel initializing" self.mcModel = pymc.Model([k, s, D_mod, D_obs]) print " mcmc initializing" self.mcmc = pymc.MCMC(self.mcModel)
def wiener_samplemodel(alpha, tau, beta, delta, N): """ easy wiener process model to create samples for given parameter values """ @pymc.deterministic def alpha(alpha=alpha, N=N): return [alpha for i in range(0, N)] @pymc.deterministic def tau(tau=tau, N=N): return [tau for i in range(0, N)] @pymc.deterministic def beta(beta=beta, N=N): return [beta for i in range(0, N)] @pymc.deterministic def delta(delta=delta, N=N): return [delta for i in range(0, N)] @pymc.deterministic def N(N=N): return N def Y_logp(value, alpha, tau, beta, delta): """ this function is not needed in this model """ return -1 def Y_rand(alpha, tau, beta, delta, dt=0.0001, sigma=1): p = .5 * (1 + ((delta * sqrt(dt)) / sigma)) i = 0 y = beta * alpha while (y < alpha and y > 0): if (pymc.random_number() <= p): y = y + sigma * sqrt(dt) else: y = y - sigma * sqrt(dt) i = i + 1 if (y >= alpha): t = (i * dt + tau) else: t = -(i * dt + tau) return t Y = np.empty(N, dtype=object) for i in range(0, N): Y[i] = pymc.Stochastic(logp=Y_logp, doc='Wiener Model', name='Y_%i' % i, parents={ 'alpha': alpha[i], 'tau': tau[i], 'beta': beta[i], 'delta': delta[i] }, random=Y_rand, trace=True, value=0, dtype=None, rseed=1., observed=False, cache_depth=2, plot=False, verbose=0) return locals()
def Ptrans_logp(value, theta): logp = 0. for i in range(value.shape[0]): logp = logp + pymc.dirichlet_like(value[i], theta) return logp def Ptrans_random(theta): return pymc.rdirichlet(theta, size=len(theta)) Ptrans = pymc.Stochastic(logp=Ptrans_logp, doc='Transition matrix', name='Ptrans', parents={'theta': theta}, random=Ptrans_random) #Hidden states stochastic def states_logp(value, Ptrans=Ptrans): if sum(value > 1): return -np.inf P = np.column_stack((Ptrans, 1. - Ptrans.sum(axis=1))) Pinit = unconditionalProbability(Ptrans) logp = pymc.categorical_like(value[0], Pinit)
def __init__(self, model, cvars, nvars, outvar, MAP='fmin_powell'): out_var_name = outvar.keys()[0] if callable(model): # model is a python function self.model = model else: try: if model.find('=') != -1: m = model.split('=') if m[0].strip() == out_var_name: model = m[1].strip() elif m[1].strip() == out_var_name: model = m[0].strip() else: raise model = sympy.sympify(model, _clash) except: err = "Model expression must be of form 'varname=expression'\ and varname must have measured data in the data table." raise ValueError(err) # find our variable names val_names = map(str, model.free_symbols) # all variables in the model must be defined if set(val_names) != set(nvars.keys()).union(set(cvars.keys())): missing = list( set(val_names).difference( set(nvars.keys()).union(set(cvars.keys())))) if missing == []: errstr = 'Error: Model did not use all parameters' else: errstr = "Error: Not all parameters in the model were defined." errstr += "\'%s\' not defined." % missing[0] raise ValueError(errstr) # turn our symbolic expression into a fast, safe function call self.model = lambdify(model.free_symbols, model, dummify=False, modules=['numpy', 'mpmath', 'sympy']) out_var = outvar[out_var_name] var = {} means = {} devs = {} dlen = out_var.shape[0] self.num_samples = 100000 self.num_burn = 20000 self.num_thin = 8 # Calibration variables for v in cvars.keys(): v = str(v) # pymc doesn't like unicode # convert to lowercase and parse d = cvars[v]['prior'].lower().replace('(', ',').replace(')', '').split(',') d[1] = float(d[1]) d[2] = float(d[2]) if cvars[v]['type'] == 'S': if d[0] == 'normal': # normal prior with mean d[1] and deviation d[2] means[v] = pymc.Normal(v + '_mean', mu=d[1], tau=1 / d[2]**2, value=d[1]) values = np.linspace(d[1] - 3 * d[2], d[1] + 3 * d[2], out_var.shape[0]) dval = d[2] elif d[0] == 'uniform': # uniform prior from d[1] to d[2] means[v] = pymc.Uniform(v + '_mean', d[1], d[2], value=(d[1] + d[2]) / 2.0) values = np.linspace(d[1], d[2], out_var.shape[0]) dval = 1 else: start_val = (d[1] + d[2]) / 2.0 values = np.linspace(d[1], d[2], out_var.shape[0]) dval = 1 means[v] = pymc.Stochastic( name=v + '_mean', logp=lambda value: -np.log(value), doc='', parents={}, value=start_val) # Jeffrey prior for deviation devs[v] = pymc.Stochastic(name=v + '_dev', logp=lambda value: -np.log(value), doc='', parents={}, value=dval) # to get a reliable deviation, we need more samples if self.num_samples < 1000000: self.num_samples *= 10 self.num_burn *= 10 self.num_thin *= 10 # create a stochastic node for pymc with the mean and # dev from above and some initial values to try var[v] = pymc.Normal(v, mu=means[v], tau=1.0 / devs[v]**2, value=values) else: if d[0] == 'normal': var[v] = pymc.Normal(v, mu=d[1], tau=1 / d[2]**2) elif d[0] == 'uniform': var[v] = pymc.Uniform(v, lower=d[1], upper=d[2]) elif d[0] == 'jeffreys': start_val = (d[1] + d[2]) / 2.0 var[v] = pymc.Stochastic(name=v, doc='', logp=lambda value: -np.log(value), parents={}, value=start_val) else: print 'Unknown probability distribution: %s' % d[0] return None for v in nvars.keys(): var[v] = nvars[v][:, 0] results = pymc.Deterministic(eval=self.model, name='results', parents=var, doc='', trace=True, verbose=0, dtype=float, plot=False, cache_depth=2) mdata = out_var[:, 0] mdata_err = out_var[:, 1] mcmc_model_out = pymc.Normal('model_out', mu=results, tau=1.0 / mdata_err**2, value=mdata, observed=True) self.mcmc_model = pymc.Model(var.values() + means.values() + devs.values() + [mcmc_model_out]) if MAP is not None: # compute MAP and use that as start for MCMC map_ = pymc.MAP(self.mcmc_model) map_.fit(method=MAP) print '\nmaximum a posteriori (MAP) using', MAP for v in cvars.keys(): print '%s=%s' % (v, var[v].value) print # NOT calibration variables for v in nvars.keys(): data = nvars[v][:, 0] err = nvars[v][:, 1] if np.all(err <= 0.0): var[v] = data else: err[err == 0] = 1e-100 # norm_err = pymc.Normal(v + '_err', mu=0, tau=1.0 / err ** 2) # var[v] = data + norm_err var[v] = pymc.Normal(v + '_err', mu=data, tau=1.0 / err**2) results = pymc.Deterministic(eval=self.model, name='results', parents=var, doc='', trace=True, verbose=0, dtype=float, plot=False, cache_depth=2) self.mcmc_model = pymc.Model(var.values() + means.values() + devs.values() + [mcmc_model_out]) self.cvars = cvars self.var = var self.dlen = dlen self.means = means self.devs = devs
else: return -numpy.log(t_h - t_l + 1) def s_rand(t_l, t_h): return numpy.round((t_l - t_h) * random()) + t_l s = pm.Stochastic(logp=s_logp, doc='The switchpoint for the rate of disaster occurrence.', name='s', parents={ 't_l': 1851, 't_h': 1962 }, random=s_rand, trace=True, value=1900, dtype=int, rseed=1., observed=False, cache_depth=2, plot=True, verbose=0) x = pm.Binomial('x', value=7, n=10, p=.8, observed=True) x = pm.MvNormalCov('x', numpy.ones(3), numpy.eye(3)) y = pm.MvNormalCov('y', numpy.ones(3), numpy.eye(3)) print x + y #<pymc.PyMCObjects.Deterministic '(x_add_y)' at 0x105c3bd10>
return -np.inf def triangular_random(a, b, c): return np.random.triangular(a, c, b) Z = pm.Stochastic(logp=triangular_logp, doc='Triangular Distribution', name='Z', parents={ 'a': -3, 'b': 8, 'c': 0 }, random=triangular_random, trace=True, value=0, dtype=float, rseed=1., observed=False, cache_depth=2, plot=True, verbose=0) model = pm.Model([Z]) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) Z_samples = mcmc.trace('Z')[:]
def mcmc_fit(self): # fitting using adaptive Markov Chain Monte Carlo logging.info('Start MCMC fit') if self.DOWN: self.fit_params_init = self.DOWN_fit_output else: self.fit_params_init = self.fit_params data = self.data.obs_spectrum[:, 1] #observed data datastd = self.data.obs_spectrum[:, 2] #data error # setting prior distributions # master thread (MPIrank =0) will start from ideal solution (from downhill fitting) # slave threads (MPIrank != 0) will start from randomised points. priors = empty(size(self.fit_params_init), dtype=object) if self.MPIrank == 0: # setting up main thread. Use downhill FIT as starting points for i in range(self.fit_nparams): priors[i] = pymc.Uniform( 'PFIT_%i' % (i), self.fit_bounds[i][0], self.fit_bounds[i][1], value=self.fit_params_init[i]) # uniform prior else: #setting up other threads (if exist). Their initial starting positions will be randomly perturbed for i in range(self.fit_nparams): param_range = ( self.fit_bounds[i][1] - self.fit_bounds[i][0] ) / 5.0 #range of parameter over which to perturb starting position param_mean = np.mean(self.fit_bounds[i]) param_rand = random.uniform( low=param_mean - param_range, high=param_mean + param_range) #random parameter start priors[i] = pymc.Uniform('PFIT_%i' % (i), self.fit_bounds[i][0], self.fit_bounds[i][1], value=param_rand) # uniform prior #setting up data error prior if specified if self.params.mcmc_update_std: #uniform prior on data standard deviation std_dev = pymc.Uniform('datastd', 0.0, 2.0 * max(datastd), value=datastd, size=len(datastd)) else: std_dev = pymc.Uniform('datastd', 0.0, 2.0 * max(datastd), value=datastd, observed=True, size=len(datastd)) def mcmc_loglikelihood(value, fit_params, datastd, data): # log-likelihood function. Needs to be initialised directly since CYTHON does not like PYMC decorators # @todo need to cq ast from numpy object array to float array. Slow but hstack is slower. fit_params_container = zeros((self.fit_nparams)) for i in range(self.fit_nparams): fit_params_container[i] = fit_params[i] # @todo params_container should be equal to PFIT? I think so... Maybe not if we fix some values chi_t = self.chisq_trans(fit_params_container, data, datastd) #calculate chisq llterms = (-len(data) / 2.0) * log(pi) - log( mean(datastd)) - 0.5 * chi_t return llterms mcmc_logp = pymc.Stochastic( logp=mcmc_loglikelihood, doc='The switchpoint for mcmc loglikelihood.', name='switchpoint', parents={ 'fit_params': priors, 'datastd': datastd, 'data': data }, #random = switchpoint_rand, trace=True, value=self.fit_params_init, dtype=int, rseed=1., observed=True, cache_depth=2, plot=False, verbose=0) # set output folder dir_mcmc_thread = os.path.join(self.dir_mcmc, 'thread_%i' % self.MPIrank) # remove old files if os.path.isdir(dir_mcmc_thread): logging.debug('Remove folder %s' % dir_mcmc_thread) shutil.rmtree(dir_mcmc_thread) # executing MCMC sampling if self.params.mcmc_verbose: verbose = 1 else: verbose = 0 # build the model R = pymc.MCMC((priors, mcmc_logp), verbose=verbose, db='txt', dbname=dir_mcmc_thread) # populate and run it R.sample(iter=self.params.mcmc_iter, burn=self.params.mcmc_burn, thin=self.params.mcmc_thin, progress_bar=self.params.mcmc_progressbar, verbose=verbose) # todo Save similar output: NEST_out self.MCMC_R = R self.MCMC = True
def builder(data, distributions, xprior, initial_params={}): """Return a MCMC model selection sampler. Parameters --------- data : array Data set used to select the distribution. distributions : sequence A collection of Stochastic instances. For each given function, there has to be an entry in moments, jacobians and defaults with the same __name__ as the distribution. Basically, all we really need is a random method, a log probability and default initial values. We should eventually let users define objects that have those attributes. xprior : function(x) Function returning the log probability density of x. This is a prior estimate of the shape of the distribution. weights : sequence The prior probability assigned to each distribution in distributions. default_params : dict A dictionary of initial parameters for the distribution. """ # 1. Extract the relevant information about each distribution: # name, random generating function, log probability and default parameters. names = [] random_f = {} logp_f = {} init_val = {} for d in distributions: name = d.__name__.lower() if d.__module__ == 'pymc.distributions': random = getattr(pymc, 'r%s' % name) logp = getattr(pymc, name + '_like') initial_values = guess_params_from_sample(data, name) elif d.__module__ == 'pymc.ScipyDistributions': raise ValueError, 'Scipy distributions not yet supported.' else: try: random = d.random logp = d.logp initial_values = d.defaults except: raise ValueError, 'Unrecognized distribution %s' % d.__str__() if initial_values is None: raise ValueError, 'Distribution %s not supported. Skipping.' % name names.append(name) random_f[name] = random logp_f[name] = logp init_val[name] = initial_values init_val.update(initial_params) print random_f, logp_f, init_val # 2. Define the various latent variables and their priors nr = 10 latent = {} for name in names: prior_distribution = lambda value: xprior(random_f[name] (size=nr, *value)) prior_distribution.__doc__ = \ """Prior density for the parameters. This function draws random values from the distribution parameterized with values. The probability of these random values is then computed using xprior.""" latent['%s_params' % name] = pymc.Stochastic( logp=prior_distribution, doc='Prior for the parameters of the %s distribution' % name, name='%s_params' % name, parents={}, value=np.atleast_1d(init_val[name]), ) # 3. Compute the probability for each model lprob = {} for name in names: def logp(params): lp = logp_f[name](data, *params) return lp lprob['%s_logp' % name] = pymc.Deterministic( eval=logp, doc= 'Likelihood of the dataset given the distribution and the parameters.', name='%s_logp' % name, parents={'params': latent['%s_params' % name]}) input = latent input.update(lprob) input['names'] = names M = pymc.MCMC(input=input) #for name in names: #M.use_step_method(pymc.AdaptiveMetropolis, input['%s_params'%name], verbose=3) return M
def umbrella_logp(value, rain): """ Umbrella node. Initial value is False. P(umbrella=True | rain=True) = 0.9 P(umbrella=True | rain=False) = 0.2 """ p_umb_given_rain = 0.9 p_umb_given_no_rain = 0.2 if rain: logp = pymc.bernoulli_like(value, p_umb_given_rain) else: logp = pymc.bernoulli_like(value, p_umb_given_no_rain) return logp # declare umbrella as observed, with value True umbrella = pymc.Stochastic(name="umbrella", doc="umbrella var", parents={"rain": rain}, logp=umbrella_logp, value=True, observed=True) rain_hmm = pymc.Model([rain, umbrella]) m = pymc.MCMC(rain_hmm) m.sample(iter=5000) print "\n" print "rain: " print np.mean(m.trace("rain")[:])
def wiener_model(RT, Cond, N, C, inits=None, WIENER_ERR=0.0001): """ easy wiener process model inits takes initial values for alpha, tau and delta """ if inits is None: inits = (1,0.001,[0 for i in range(0,C)]) alpha = pymc.Uniform('alpha', lower=0.0001,upper=5, value=inits[0]) tau = pymc.Uniform('tau', lower=0.0001,upper=1, value=inits[1]) @pymc.deterministic def beta(): return 0.5 delta = np.empty(C, dtype=object) for i in range(0,C): delta[i] = pymc.Normal('delta_%i' % i, mu=0, tau=1, value=inits[2][i]) def Y_logp(value, alpha, tau, beta, delta): # make sure all variables are float: alpha = float(alpha) tau = float(tau) beta = float(beta) delta = float(delta) # extract RT if (value < 0): value = abs(value) else: beta = 1-beta; delta = -delta; value = value-tau # remove non-decision time from value value = value / (pow(alpha,2)) # convert t to normalized time tt if (value < 0): return -float_info.max # calculate number of terms needed for large t if (pi*value*WIENER_ERR<1): # if error threshold is set low enough kl = sqrt(-2*log(pi*value*WIENER_ERR)/(pow(pi,2)*value)) # bound if not(kl>1/(pi*sqrt(value))): # ensure boundary conditions met kl = 1/(pi*sqrt(value)) else: # if error threshold set too high kl = 1/(pi*sqrt(value)) # set to boundary condition # calculate number of terms needed for small t if ((2*sqrt(2*pi*value)*WIENER_ERR)<1): # if error threshold is set low enough ks = 2+sqrt(-2*value*log(2*sqrt(2*pi*value)*WIENER_ERR)) # bound if not(ks>sqrt(value)+1): # ensure boundary conditions are met ks = sqrt(value)+1 else: # if error threshold was set too high ks = 2 # minimal kappa for that case # compute density: f(tt|0,1,beta) ans = 0 #initialize density if (ks<kl): # if small t is better (i.e., lambda<0) K = ceil(ks) # round to smallest integer meeting error for k in range(int(-floor((K-1)/2)), int(ceil((K-1)/2)+1)): # loop over k ans = ans+(beta+2*k)*exp(-(pow((beta+2*k),2))/2/value) # increment sum ans = log(pseudo_zero(ans))-0.5*log(2)-log(sqrt(pi))-1.5*log(value) # add constant term else: # if large t is better... K = ceil(kl) # round to smallest integer meeting error for k in range(0,int(K)): ans = ans+k*exp(-(pow(k,2))*(pow(pi,2))*value/2)*sin(k*pi*beta); # increment sum ans = log(pseudo_zero(ans))+2*log(sqrt(pi)) # add constant term # convert to f(t|delta,a,beta) and return result return ans+((-delta*alpha*beta -(pow(delta,2))*(value*pow(alpha,2))/2)-log(pow(alpha,2))) def Y_rand(alpha, tau, beta, delta): """ this function is not needed in this model """ pass Y = np.empty(N, dtype=object) for i in range(0,N): Y[i] = pymc.Stochastic(logp=Y_logp, doc = 'Wiener Model', name = 'Y_%i' % i, parents = {'alpha': alpha, 'tau': tau, 'beta': beta, 'delta': delta[Cond[i]]}, random = Y_rand, trace = True, value = RT[i], dtype= None, rseed = 1., observed = True, cache_depth = 2, plot = False, verbose = 0) return locals()