def three_model_comparison(p_df): a_n = len(p_df) t_lam = pm.Uniform('d_lam', 0, 1) #d_lam = 1.0 / np.mean(p_df) t_lambda_1 = pm.Exponential("t_lambda_1", t_lam) #t_lambda_1 = pm.Uniform("t_lambda_1", min(p_df), max(p_df)) t_lambda_2 = pm.Exponential("t_lambda_2", t_lam) #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df)) t_lambda_3 = pm.Exponential("t_lambda_3", t_lam) #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df)) #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) ) t_tau_1 = pm.DiscreteUniform("tau1", lower=0, upper=max(p_df) - 1) t_tau_2 = pm.DiscreteUniform("tau", lower=t_tau_1, upper=max(p_df)) @pm.deterministic def lambda_(tau_1=t_tau_1, tau_2=t_tau_2, lambda_1=t_lambda_1, lambda_2=t_lambda_2, lambda_3=t_lambda_3): out = np.zeros(a_n) out[:tau_1] = lambda_1 # lambda before tau_1 is lambda1 out[tau_1:tau_2] = lambda_2 # lambda_2 between tau_1 and tau_2 out[tau_2:] = lambda_3 # lambda after (and including) tau is lambda_3 return out t_obs = pm.Poisson('t_observed', mu=lambda_, value=p_df, observed=True) t_model = pm.Model( [t_obs, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2]) #d_model = pm.Model([d_obs, t_lambda_1, t_lambda_2, tau]) return t_model, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2
def all_matches(matches, match_evaluator): match_vars = [] for i in range(0,len(matches)): match=matches[i] match_name = 'match_%i' % i if match.order == "unordered": order = pm.DiscreteUniform('match_%i_order' % i, lower=0, upper=len(match.players)*2 - 1) else: observed = match.order == "total" order = pm.DiscreteUniform('match_%i_order' % i, value=0, lower=0, observed=observed, upper=len(match.players) - 1) eval_func = match_evaluator.eval_with_order parents = {'players': match.players, 'winning_team': match.winning_team, 'order': order, 'foul_end': match.foul_end} match_var = pm.Deterministic(eval = eval_func, doc = match_name, name = match_name, parents = parents, plot=False, dtype=float); match_vars.append(match_var) return match_vars
def two_model_comparison(p_df): a_n = len(p_df) d_lam = pm.Uniform('d_lam', 0, 1) #d_lam = 1.0 / np.mean(p_df) lambda_1 = pm.Exponential("lambda_1", d_lam) #lambda_1 = pm.Uniform("lambda_1", min(p_df), max(p_df)) lambda_2 = pm.Exponential("lambda_2", d_lam) #lambda_2 = pm.Uniform("lambda_2",min(p_df), max(p_df)) #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) ) tau = pm.DiscreteUniform("tau", lower=0, upper=max(p_df)) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(a_n) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out d_obs = pm.Poisson('d_observed', mu=lambda_, value=p_df, observed=True) d_model = pm.Model([d_obs, d_lam, lambda_1, lambda_2, tau]) #d_model = pm.Model([d_obs, lambda_1, lambda_2, tau]) return d_model, d_obs, d_lam, lambda_1, lambda_2, tau
def compute_n_sat_prior(informative=False, poisson_mu=None, uniform_lower=None, uniform_upper=None): """ Compute n_sat prior. Note: There are two options for modelling n_sat: - uninformative: discrete uniform distribution - informative: Poisson distribution Parameters ---------- informative : bool, optional (default: False) If True, n_sat is modelled by a Poisson distribution. Else, n_sat is modelled by a discrete uniform distribution. poisson_mu : int, optional (default: None) Parameter mu (i.e. mean) of the Poisson distribution used to model n_sat. Must be specified if `informative` is True. uniform_lower : int, optional (default: None) Lower bound of the discrete uniform distribution used to model n_sat. Must be specified if `informative` is False. uniform_upper : int, optional (default: None) Upper bound of the discrete uniform distribution used to model n_sat. Must be specified if `informative` is False. Returns ------- pymc distribution Prior distribution for n_sat. """ if informative: if poisson_mu is None: error_msg = ("If you want to use a Poisson prior for n_sat, " "please specify the parameter `poisson_mu`.") sys.exit(error_msg) return pymc.Poisson("n_sat", mu=poisson_mu) if (uniform_lower is None or uniform_upper is None): error_msg = ("If you want to use an uniform prior for n_sat, " "please specify the parameters `uniform_lower` " "and `uniform_upper`.") sys.exit(error_msg) return pymc.DiscreteUniform("n_sat", lower=uniform_lower, upper=uniform_upper)
def priors_sample(self, n): """Generates n samples of all priors""" self.prior_samples = {} for key in self.priors: if str(self.priors[str(key)]["type"])=="Normal": self.prior_samples[str(key)] = [pymc.Normal(str(key), self.priors[str(key)]["mean"],1./np.square(self.priors[str(key)]["stdev"])).random() for i in range(n)] elif str(self.priors[str(key)]["type"])=="DiscreteUniform": self.prior_samples[str(key)] = [pymc.DiscreteUniform(str(key), self.priors[str(key)]["lower"],self.priors[str(key)]["upper"]).random() for i in range(n)] else: print("Distribution type not supported.") break
def make_model(data): switchpoint = pm.DiscreteUniform('switchpoint', 0, len(data)) early_rate = pm.Beta('early_rate', 0.5, 0.5) late_rate = pm.Beta('late_rate', 0.5, 0.5) @pm.deterministic(plot=False) def rate(s=switchpoint, early=early_rate, late=late_rate): out = np.empty(len(data)) out[:s] = early out[s:] = late return out phredscore = pm.Bernoulli('phredscore', p=rate, value=data, observed=True) return locals()
def makeSampledPrior(self, manager, parts): parts.shape_sample_index = [pymc.DiscreteUniform('shape_index_%d' % i, 0, len(x)-1) \ for i, x in enumerate(self.shapedistro_params)] parts.shape_params = np.empty(self.nshapebins, dtype=object) for i, index, samples in zip(range(self.nshapebins), parts.shape_sample_index, self.shapedistro_params): @pymc.deterministic(name='shape_params_%d' % i) def shape_param_func(index=index, samples=samples): return np.ascontiguousarray(samples[:, index]) parts.shape_params[i] = shape_param_func
def setup_inference_mixture(self): #depending on the number of wavelengths #self.wavelengths = [self.wavelengths[len(self.wavelengths)-1]] wavelength_number = len(self.wavelengths) t = 1. / 2.5**2 C_sigs = pymc.Container([pymc.HalfCauchy("c_sigs_%i_%i" % (i, x), beta = 10, alpha=1) for i in range(1+2*self.N) for x in range(wavelength_number)]) C = pymc.Container([pymc.Normal("c_%i_%i" % (i, x), mu=0, tau = 1. / C_sigs[i*wavelength_number+x]**2) for i in range(1+2*self.N) for x in range(wavelength_number)]) i_ = pymc.Container([pymc.DiscreteUniform('i_%i' %i,lower=0,upper=1) for i in range(len(self.xdata))]) @pymc.stochastic(observed=False) def sigma(value=1): return -np.log(abs(value)) @pymc.stochastic(observed=False) def sigma3(value=1): return -np.log(abs(value)) qw_sigs = pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1) for x in range(wavelength_number)]) if self.wavelength_sd_defined: qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x], tau = 1. / self.wavelength_sd[x] ** 2) for x in range(wavelength_number)]) else: qw = pymc.Container([pymc.distributions.Uniform('qw_%i' %x,lower=0., upper=self.wavelengths[x]*2) for x in range(wavelength_number)]) def fourier_series(C,N,QW,x,wavelength_number,i_): v = np.array(x) v.fill(0.0) v = v.astype('float') for ii in range(len(x)): for w in range(wavelength_number): v += C[w] for i in range(1,N+1): v[ii] = v[ii] + C[(2*i-1)*wavelength_number+w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) + C[(2*i)*wavelength_number+w]*np.sin(2*np.pi/QW[w] * i * (x[ii])) #if i_[ii] == 0: # v[ii] = -v[ii] return v#np.sum(v) self.vector_fourier_series = np.vectorize(fourier_series) # Define the form of the model and likelihood @pymc.deterministic def y_model(C=C,x=self.xdata,qw=qw,nn=self.N,wavelength_number=wavelength_number,i_=i_): return fourier_series(C,nn,qw,x,wavelength_number,i_) y = pymc.Normal('y', mu=y_model, tau=1. / sigma ** 2, observed=True, value=self.ydata) # package the full model in a dictionary self.model1 = dict(C=C, qw=qw, sigma=sigma,qw_sigs=qw_sigs, y_model=y_model, y=y,x_values=self.xdata,y_values=self.ydata,i_=i_) self.setup = True self.mcmc_uptodate = False return self.model1
def main(): lambda_1 = pm.Exponential("lambda_1", 1) # prior on first behaviour lambda_2 = pm.Exponential("lambda_2", 1) # prior on second behaviour tau = pm.DiscreteUniform("tau", lower=0, upper=10) # prior on behaviour change print "lambda_1.value = %.3f" % lambda_1.value print "lambda_2.value = %.3f" % lambda_2.value print "tau.value = %.3f" % tau.value print lambda_1.random(), lambda_2.random(), tau.random() print "After calling random() on the variables..." print "lambda_1.value = %.3f" % lambda_1.value print "lambda_2.value = %.3f" % lambda_2.value print "tau.value = %.3f" % tau.value samples = [lambda_1.random() for i in range(20000)] plt.hist(samples, bins=70, normed=True, histtype="stepfilled") plt.title("Prior distribution for $\lambda_1$") plt.xlim(0, 8) plt.show() data = np.array([10, 5]) fixed_variable = pm.Poisson("fxd", 1, value=data, observed=True) print "value: ", fixed_variable.value print "calling .random()" fixed_variable.random() print "value: ", fixed_variable.value n_data_points = 5 # in CH1 we had ~70 data points @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_data_points) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after tau is lambda2 return out data = np.array([10, 25, 15, 20, 35]) obs = pm.Poisson("obs", lambda_, value=data, observed=True) model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
def ML_NFW_Model(r_mpc, ghats, betas, pdz, concentration, zcluster, likelihood_func, shapedistro_samples): #r_mpc, ghats, pdz, and shapedistro_samples may be either arrays or lists of arrays # if lists of arrays, then each entry is associated across the lists and will be passed # to likelihood_func seperately, with the logProbs summed together ####### # Data Prep ####### if not isinstance(r_mpc, list): r_mpc = [r_mpc] ghats = [ghats] pdz = [pdz] shapedistro_samples = [shapedistro_samples] r_mpc = [np.ascontiguousarray(x) for x in r_mpc] pdz = [np.ascontiguousarray(x) for x in pdz] shapedistro_samples = [ np.ascontiguousarray(x) for x in shapedistro_samples ] D_lens = sp.angulardist(zcluster) nzbins = len(betas) ghats = [np.ascontiguousarray(x) for x in ghats] betas = np.ascontiguousarray(betas) nshapebins = len(shapedistro_samples) ####### # Model ####### ## shape parameter priors shape_sample_index = [ pymc.DiscreteUniform('shape_index_%d' % i, 0, len(x)) for i, x in enumerate(shapedistro_samples) ] shape_params = np.empty(nshapebins, dtype=object) for i, index, samples in zip(range(len(shapedistro_samples)), shape_sample_index, shapedistro_samples): @pymc.deterministic(name='shape_params_%d' % i) def shape_param_func(index=index, samples=samples): return samples[:, index] shape_params[i] = shape_param_func ## r_scale is log-uniform log_r_scale = pymc.Uniform('log_r_scale', np.log(.01), np.log(1.)) @pymc.stochastic(observed=True) def data(value=ghats, log_r_scale=log_r_scale, shape_params=shape_params): logprobs = np.array([likelihood_func(log_r_scale, cur_r_mpc, cur_ghats, betas, cur_pdz, cur_shapedistro_samples, concentration, zcluster, D_lens) \ for (cur_r_mpc, cur_ghats, cur_pdz, cur_shapedistro_samples) \ in zip(r_mpc, ghats, pdz, shape_params)]) return np.sum(logprobs) ######## return locals()
def generate_pymc_(self, params, q0=None): ''' Creates PyMC objects for each param in dictionary NOTE: the second argument for normal distributions is VARIANCE Prior option: An arbitrary prior distribution derived from a set of samples (e.g., a previous mcmc run) can be passed with the following syntax: = {<name> : ['KDE', <pymc_database>, <param_names>]} where <name> is the name of the distribution (e.g., 'prior' or 'joint_dist'), <pymc_database> is the pymc database containing the samples from which the prior distribution will be estimated, and <param_names> are the children parameter names corresponding to the dimension of the desired sample array. This method will use all samples of the Markov chain contained in <pymc_database> for all traces named in <param_names>. Gaussian kernel-density estimation is used to derive the joint parameter distribution, which is then treated as a prior in subsequent mcmc analyses using the current class instance. The parameters named in <param_names> will be traced as will the multivariate distribution named <name>. ''' pymc_mod = [] pymc_mod_order = [] parents = dict() # Iterate through , assign prior distributions for key, args in self.params.iteritems(): # Distribution name should be first entry in [key] dist = args[0].lower() if dist == 'normal': if q0 == None: RV = [pymc.Normal(key, mu=args[1], tau=1. / args[2])] else: RV = [ pymc.Normal(key, mu=args[1], tau=1. / args[2], value=q0[key]) ] elif dist == 'uniform': if q0 == None: RV = [pymc.Uniform(key, lower=args[1], upper=args[2])] else: RV = [ pymc.Uniform(key, lower=args[1], upper=args[2], value=q0[key]) ] elif dist == 'discreteuniform': if q0 == None: RV = [ pymc.DiscreteUniform(key, lower=args[1], upper=args[2]) ] else: RV = [ pymc.DiscreteUniform(key, lower=args[1], upper=args[2], value=q0[key]) ] elif dist == 'truncatednormal': if q0 == None: RV = [ pymc.TruncatedNormal(key, mu=args[1], tau=1. / args[2], a=args[3], b=args[4]) ] else: RV = [ pymc.TruncatedNormal(key, mu=args[1], tau=1. / args[2], a=args[3], b=args[4], value=q0[key]) ] elif dist == 'kde': kde = multivariate_kde_from_samples(args[1], args[2]) kde_rv, rvs = self._create_kde_stochastic(kde, key, args[2]) if q0 != None: kde_rv.value = q0 RV = [kde_rv] for rv_key, rv_value in rvs.iteritems(): parents[rv_key] = rv_value RV.append(rv_value) else: raise KeyError('The distribution "' + dist + '" is not supported.') parents[key] = RV[0] pymc_mod_order.append(key) pymc_mod += RV return parents, pymc_mod, pymc_mod_order
# x_n_s = x_n #np.sort(x_n) # y_n_s = y_n #np.sort(y_n) # x = (x_n_s[d]-x_n_s[o])*f + x_n_s[o] # y = (y_n_s[d]-y_n_s[o])*f + y_n_s[o] # out = np.column_stack([x, y]) # return out Prows = np.empty(Nnodes, dtype=object) for i in range(Nnodes): t = np.ones(Nnodes) * 10 t[i] = 0.5 Prows[i] = pymc.Dirichlet('Dir_%i' % i, theta=t) #Cardinality / Sparsity of the transition matrices Vk = pymc.DiscreteUniform('Sparsity', lower=1, upper=min(4, Nnodes), size=Nnodes) #Vk = pymc.Geometric('Sparsity', p = 0.7, size=Nnodes) Nsamples_multi = Nsamples / Nnodes # s_tp1 = np.array([pymc.Multinomial('multi_%i'%i, p=P_s_tp1[i], n=Nsamples_multi, plot=False) for i in range(Nnodes)]) #frac = np.linspace(0, 1, Nsamples_multi) frac = np.random.rand(Nsamples_multi) @pymc.deterministic def adjMatrix(Prows=Prows, Vk=Vk): P = np.empty((Nnodes, Nnodes)) for (row, s_o) in enumerate(np.arange(Nnodes)):
# for python 2.7 # xLabel = "Рік".decode('utf8') # yLabel = "Кількість штормів".decode('utf8') # for python 3.5 xLabel = "Рік" yLabel = "Кількість штормів" plt.xlabel(xLabel) plt.ylabel(yLabel) general.set_grid_to_plot() plt.savefig(general.folderPath1 + "exp1_storms.png") plt.clf() switchpoint = pymc.DiscreteUniform('switchpoint', lower=0, upper=len(arr)) early_mean = pymc.Exponential('early_mean', beta=1) late_mean = pymc.Exponential('late_mean', beta=1) @pymc.deterministic(plot=False) def rate(s=switchpoint, e=early_mean, l=late_mean): out = np.empty(len(arr)) out[:s] = e out[s:] = l return out storms = pymc.Poisson('storms', mu=rate, value=arr, observed=True) model = pymc.Model([switchpoint, early_mean, late_mean, rate, storms]) mcmc = pymc.MCMC(model)
import pymc as pm car_door = pm.DiscreteUniform("car_door", lower=1, upper=3) picked_door = pm.DiscreteUniform("picked_door", lower=1, upper=3) preference = pm.DiscreteUniform("preference", lower=0, upper=1) @pm.deterministic def host_choice(car_door=car_door, picked_door=picked_door, preference=preference): if car_door != picked_door: return 6 - car_door - picked_door if car_door == 1: left = 2 right = 3 else: left = 1 if car_door == 2: right = 3 else: right = 2 out = right if preference else left return out @pm.deterministic def changed_door(picked_door=picked_door, host_choice=host_choice): return 6 - host_choice - picked_door
def compare_groups(list1, list2): data = list1 + list2 count_data = np.array(data) n_count_data = len(count_data) plt.bar(np.arange(n_count_data), count_data, color="#348ABD") plt.xlabel("Time (days)") plt.ylabel("count of text-msgs received") plt.title( "Did the viewers' ad viewing increase with the number of ads shown?") plt.xlim(0, n_count_data) #plt.show() alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts print alpha lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau]) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) lambda_1_samples = mcmc.trace('lambda_1')[:] lambda_2_samples = mcmc.trace('lambda_2')[:] tau_samples = mcmc.trace('tau')[:] print tau_samples # histogram of the samples: ax = plt.subplot(311) ax.set_autoscaley_on(False) plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85, label="posterior of $\lambda_1$", color="#A60628", normed=True) plt.legend(loc="upper left") plt.title(r"""Posterior distributions of the variables $\lambda_1,\;\lambda_2,\;\tau$""") plt.xlim([0, 6]) plt.ylim([0, 7]) plt.xlabel("$\lambda_1$ value") ax = plt.subplot(312) ax.set_autoscaley_on(False) plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85, label="posterior of $\lambda_2$", color="#7A68A6", normed=True) plt.legend(loc="upper left") plt.xlim([0, 6]) plt.ylim([0, 7]) plt.xlabel("$\lambda_2$ value") plt.subplot(313) w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples) plt.hist(tau_samples, bins=n_count_data, alpha=1, label=r"posterior of $\tau$", color="#467821", weights=w, rwidth=2.) plt.xticks(np.arange(n_count_data)) plt.legend(loc="upper left") plt.ylim([0, .75]) plt.xlim([0, len(count_data)]) plt.xlabel(r"$\tau$ (iterations)") plt.ylabel("probability") plt.show()
data_generator.parents data_generator.children # 'value' attribute parameter.value data_generator.value data_plus_one.value # 'stochastic' vars - still random even if parents are known # 'deterministic' vars - not random if parents are known # Initializing variables # * name argument - retrieves posterior dist # * class specific arguments # * size - multivariate indp array of stochastic vars some_var = pm.DiscreteUniform( "discrete_uni_var", 0, 4 ) betas = pm.Uniform( "betas", 0, 1, size=10 ) betas.value # var.random() - generates new value # var.value - returns new value lambda_1 = pm.Exponential( "lambda_1", 1 ) lambda_2 = pm.Exponential( "lambda_2", 2 ) tau = pm.DiscreteUniform( "tau", lower = 0, upper = 10 ) lambda_1.value lambda_2.value tau.value lambda_1.random()
def make_normal_baseline_hmm(y_data, X_data, baseline_end, initial_params): """ Construct a PyMC2 scalar normal-emmisions HMM with a stochastic reporting period start time parameter and baseline, reporting parameters for all other stochastics/estimated terms in the model. The reporting period start time parameter is given a discrete uniform distribution starting from the first observation after the baseline to the end of the series. Parameters ========== y_data: pandas.DataFrame Usage/response observations. X_data: list of pandas.DataFrame List of design matrices for each state. Each must span the entire length of observations (i.e. `y_data`). baseline_end: pandas.tslib.Timestamp End of baseline period (inclusive), beginning of reporting period. initial_params: NormalHMMInitialParams An object containing the following fields/members: Returns ======= A pymc.Model object used for sampling. """ N_states = len(X_data) N_obs = X_data[0].shape[0] alpha_trans = initial_params.alpha_trans # TODO: If we wanted a distribution over the time # when a renovation becomes effective... baseline_idx = X_data[0].index.get_loc(baseline_end) reporting_start = pymc.DiscreteUniform("reporting_start", baseline_idx + 1, N_obs, value=baseline_idx + 1) trans_mat_baseline = TransProbMatrix("trans_mat_baseline", alpha_trans, value=initial_params.trans_mat) trans_mat_reporting = TransProbMatrix("trans_mat_reporting", alpha_trans, value=initial_params.trans_mat) @pymc.deterministic(trace=True, plot=False) def N_baseline(rs_=reporting_start): return rs_ - 1 states_baseline_0 = initial_params.states[slice(0, baseline_idx)] states_baseline = HMMStateSeq("states_baseline", trans_mat_baseline, N_baseline, p0=initial_params.p0, value=states_baseline_0) @pymc.deterministic(trace=True, plot=False) def N_reporting(rs_=reporting_start): return N_obs - rs_ states_reporting_0 = initial_params.states[slice(baseline_idx, N_obs)] # TODO, FIXME: p0 should depend on states_baseline and trans_mat_baseline, # no? states_reporting = HMMStateSeq("states_reporting", trans_mat_reporting, N_reporting, p0=initial_params.p0, value=states_reporting_0) @pymc.deterministic(trace=True, plot=False) def states(sb_=states_baseline, sr_=states_reporting): return np.concatenate([sb_, sr_]) Ws = initial_params.Ws betas = [[], []] for s in range(N_states): size_s = len(initial_params.betas[s]) baseline_beta_s = pymc.Cauchy('base-beta-{}'.format(s), initial_params.betas[s], Ws[s], value=initial_params.betas[s], size=size_s if size_s > 1 else None) betas[0] += [baseline_beta_s] reporting_beta_s = pymc.Cauchy('rep-beta-{}'.format(s), initial_params.betas[s], Ws[s], value=initial_params.betas[s], size=size_s if size_s > 1 else None) betas[1] += [reporting_beta_s] del s, baseline_beta_s, reporting_beta_s, size_s Vs = initial_params.Vs mu = HMMLinearCombination('mu', X_data, betas, states) @pymc.deterministic(trace=False, plot=False) def V(states_=states, V_=Vs): return V_[states_] if y_data is not None: y_data = np.ma.masked_invalid(y_data).astype(np.object) y_data.set_fill_value(None) y_rv = pymc.Normal('y', mu, 1. / V, value=y_data, observed=True if y_data is not None else False) del initial_params return pymc.Model(locals())
import pymc as pm import numpy from pymc.examples.DisasterModel import * s = pm.DiscreteUniform('s', 1851, 1962, value=1900) @pm.stochastic(dtype=int) def s(value=1900, t_l=1851, t_h=1962): """The switchpoint for the rate of disaster occurrence.""" if value > t_h or value < t_l: # Invalid values return -numpy.inf else: # Uniform log-likelihood return -numpy.log(t_h - t_l + 1) @pm.stochastic(dtype=int) def s(value=1900, t_l=1851, t_h=1962): """The switchpoint for the rate of disaster occurrence.""" def logp(value, t_l, t_h): if value > t_h or value < t_l: return -numpy.inf else: return -numpy.log(t_h - t_l + 1) def random(t_l, t_h): return numpy.round((t_l - t_h) * random()) + t_l def s_logp(value, t_l, t_h):
import pymc as mc count_data = np.loadtxt("../../Chapter1_Introduction/data/txtdata.csv") n_count_data = len(count_data) alpha = 1.0 / count_data.mean() #recall count_data is #the variable that holds our txt counts lambda_1 = mc.Exponential("lambda_1", alpha) lambda_2 = mc.Exponential("lambda_2", alpha) tau = mc.DiscreteUniform("tau", lower=0, upper=n) @mc.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 #lambda before tau is lambda1 out[tau:] = lambda_2 #lambda after tau is lambda1 return out observation = mc.Poisson("obs", lambda_, value=count_data, observed=True) model = mc.Model([observation, lambda_1, lambda_2, tau]) mcmc = mc.MCMC(model) mcmc.sample(100000, 50000, 1)
import numpy as np import pymc as pm from matplotlib import pyplot as plt true_N = 500 D = pm.rdiscrete_uniform(1, true_N, size=10) N = pm.DiscreteUniform("N", lower=D.max(), upper=10000) observation = pm.DiscreteUniform("obs", lower=0, upper=N, value=D, observed=True) model = pm.Model([observation, N]) mcmc = pm.MCMC(model) mcmc.sample(40000, 10000, 1) N_samples = mcmc.trace('N')[:] # histogram of the samples: plt.hist(N_samples, normed=True) plt.show()
@author: Usamahk """ # Testing the PyMC function. Learning about stochastic and deterministic # variables. From Cam Pilon's Book import pymc as pm import numpy as np # Determining a stochastic value - random with no influences from # parent variables lambda_1 = pm.Exponential("lambda_1", 1) # prior on first behaviour lambda_2 = pm.Exponential("lambda_2", 1) # prior on second behaviour tau = pm.DiscreteUniform("tau", lower=0, upper=10) # prior on behaviour change print ("lambda_1.value = %.3f" % lambda_1.value) print ("lambda_2.value = %.3f" % lambda_2.value) print ("tau.value = %.3f" % tau.value) lambda_1.random(), lambda_2.random(), tau.random() print ("After calling random() on the variables...") print ("lambda_1.value = %.3f" % lambda_1.value) print ("lambda_2.value = %.3f" % lambda_2.value) print ("tau.value = %.3f" % tau.value) # Note: - Don't change values in-place. It messes with PyMCs caching # Defining a deterministic value - Values dependent on lambda_1 and lambda_2
plt.scatter(stormsYears, stormsNumbers, s=stormsNumbers) plt.xlabel("Рік") plt.ylabel("Кількість штормів") plt.savefig(general.folderPath2 + "exp2_storms1.png") plt.clf() plt.plot(stormsYears, stormsNumbers, '-ok') plt.xlim(year0, year1) plt.xlabel("Рік") plt.ylabel("Кількість штормів") general.set_grid_to_plot() plt.savefig(general.folderPath2 + "exp2_storms2.png") plt.clf() switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=len(stormsNumbers) - 1, doc='Switchpoint[year]') avg = np.mean(stormsNumbers) early_mean = pm.Exponential('early_mean', beta=1./avg) late_mean = pm.Exponential('late_mean', beta=1./avg) @ pm.deterministic(plot=False) def rate(s=switchpoint, e=early_mean, l=late_mean): # Concatenate Poisson means out = np.zeros(len(stormsNumbers)) out[:s] = e out[s:] = l return out storms = pm.Poisson('storms',
""" from __future__ import division import numpy as np import pymc as pm import matplotlib.pyplot as plt from plot_post import plot_post # THE DATA. N = 30 z = 8 y = np.repeat([1, 0], [z, N - z]) # THE MODEL. with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Prior nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd eta = pm.Gamma('eta', .1, .1) theta0 = 1 / (1 + pm.exp(-nu)) # theta from model index 0 theta1 = pm.exp(-eta) # theta from model index 1 theta = pm.switch(pm.eq(model_index, 0), theta0, theta1) # Likelihood y = pm.Bernoulli('y', p=theta, observed=y) # Sampling start = pm.find_MAP() steps = [pm.Metropolis([i]) for i in model.unobserved_RVs[1:]] steps.append(pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1])) trace = pm.sample(10000, steps, start=start, progressbar=False) # EXAMINE THE RESULTS.
'disasters_array', 'switchpoint', 'early_mean', 'late_mean', 'disasters' ] disasters_array = array([ 4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1 ]) n = len(disasters_array) # Define data and stochastics switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=110) early_mean = pm.Exponential('early_mean', beta=1.) late_mean = pm.Exponential('late_mean', beta=1.) @pm.stochastic(observed=True, dtype=int) def disasters(value=disasters_array, early_mean=early_mean, late_mean=late_mean, switchpoint=switchpoint): """Annual occurences of coal mining disasters.""" return pm.poisson_like(value[:switchpoint], early_mean) + pm.poisson_like( value[switchpoint:], late_mean) @pm.deterministic
neighbors, triangles, trimap, b = spherical.triangulate_sphere(X) # spherical.plot_triangulation(X,neighbors) # Matrix generation triangle_areas = [spherical.triangle_area(X, t) for t in triangles] Ctilde = spherical.Ctilde(X, triangles, triangle_areas) C = spherical.C(X, triangles, triangle_areas) G = spherical.G(X, triangles, triangle_areas) # Operator generation Ctilde = cholmod.into_matrix_type(Ctilde) G = cholmod.into_matrix_type(G) M = np.zeros(n) kappa = pm.Exponential('kappa', 1, value=3) alpha = pm.DiscreteUniform('alpha', 1, 10, value=2.) @pm.deterministic def Q(kappa=kappa, alpha=alpha): out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha, cholmod) return out # Nailing this ahead of time reduces time to compute logp from .18 to .13s for n=25000. pattern_products = cholmod.pattern_to_products(Q.value) # @pm.deterministic # def pattern_products(Q=Q): # return cholmod.pattern_to_products(Q)
def __init__(self, param, frags, stream=None, platform=None, param_to_opt=None, rj=False, sample_n5=False, continuous_phase=False, sample_phase=False, init_random=True): """ Parameters ---------- param : Parmed CharmmParameterSet frags : list of torsionfit.QMDataBase stream : str Path to CHARMM stream file. Default None. platform : openmm.Platform Default None. param_to_opt : list of tuples of torsions. Default None. rj : bool If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False sample_n5 : bool If True, will also sample n=5. Default False eliminate_phase : bool If True, will not sample phase. Instead, Ks will be able to take on negative values. Default True. If True, make sure continuous_phase is also False. continuous_phase : bool If True, will allow phases to take on any value between 0-180. If False, phase will be a discrete and only sample 0 or 180 init_random: bool Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set. tau: float hyperparameter on Gaussian prior on K Returns ------- pymc model """ if type(frags) != list: frags = [frags] self.pymc_parameters = dict() self.frags = frags self.platform = platform self.rj = rj self.sample_n5 = sample_n5 self.continuous_phase = continuous_phase self.sample_phase = sample_phase if param_to_opt: self.parameters_to_optimize = param_to_opt else: self.parameters_to_optimize = TorsionScan.to_optimize(param, stream) # Check that options are reasonable if not sample_phase and continuous_phase: warnings.warn("You can't eliminate phase but have continuous phase. Changing continuous phase to False") self.continuous_phase = False # set all phases to 0 if eliminate phase is True if not self.sample_phase: par.set_phase_0(self.parameters_to_optimize, param) multiplicities = [1, 2, 3, 4, 6] if self.sample_n5: multiplicities = [1, 2, 3, 4, 5, 6] multiplicity_bitstrings = dict() # offset for frag in self.frags: name = '%s_offset' % frag.topology._residues[0] offset = pymc.Uniform(name, lower=-50, upper=50, value=0) self.pymc_parameters[name] = offset # self.pymc_parameters['log_sigma_k'] = pymc.Uniform('log_sigma_k', lower=-4.6052, upper=3.453, value=np.log(0.01)) # self.pymc_parameters['sigma_k'] = pymc.Lambda('sigma_k', # lambda log_sigma_k=self.pymc_parameters['log_sigma_k']: np.exp( # log_sigma_k)) # self.pymc_parameters['precision_k'] = pymc.Lambda('precision_k', # lambda log_sigma_k=self.pymc_parameters['log_sigma_k']: np.exp( # -2 * log_sigma_k)) for p in self.parameters_to_optimize: torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)] = pymc.Uniform('log_sigma_k_{}'.format(torsion_name), lower=-4.6052, upper=3.453, value=np.log(0.01)) self.pymc_parameters['sigma_k_{}'.format(torsion_name)] = pymc.Lambda('sigma_k_{}'.format(torsion_name), lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp( log_sigma_k)) self.pymc_parameters['precision_k_{}'.format(torsion_name)] = pymc.Lambda('precision_k_{}'.format(torsion_name), lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp( -2 * log_sigma_k)) if torsion_name not in multiplicity_bitstrings.keys(): multiplicity_bitstrings[torsion_name] = 0 for m in multiplicities: name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] + '_' + str(m) + '_K' if not self.sample_phase: k = pymc.Normal(name, mu=0, tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)], value=0) else: k = pymc.Uniform(name, lower=0, upper=20, value=0) for i in range(len(param.dihedral_types[p])): if param.dihedral_types[p][i].per == m: multiplicity_bitstrings[torsion_name] += 2 ** (m - 1) if not self.sample_phase: k = pymc.Normal(name, mu=0, tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)], value=param.dihedral_types[p][i].phi_k) else: k = pymc.Uniform(name, lower=0, upper=20, value=param.dihedral_types[p][i].phi_k) break self.pymc_parameters[name] = k if self.sample_phase: name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] + '_' + str(m) + '_Phase' for i in range(len(param.dihedral_types[p])): if param.dihedral_types[p][i].per == m: if self.continuous_phase: phase = pymc.Uniform(name, lower=0, upper=180.0, value=param.dihedral_types[p][i].phase) else: if param.dihedral_types[p][i].phase == 0: phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=0) break if param.dihedral_types[p][i].phase == 180.0: phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=1) break else: if self.continuous_phase: phase = pymc.Uniform(name, lower=0, upper=180.0, value=0) else: phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=0) self.pymc_parameters[name] = phase if self.rj: for torsion_name in multiplicity_bitstrings.keys(): name = torsion_name + '_multiplicity_bitstring' bitstring = pymc.DiscreteUniform(name, lower=0, upper=63, value=multiplicity_bitstrings[torsion_name]) self.pymc_parameters[name] = bitstring if init_random: # randomize initial value for parameter in self.pymc_parameters: if type(self.pymc_parameters[parameter]) != pymc.CommonDeterministics.Lambda: # and parameter[:11] != 'log_sigma_k': self.pymc_parameters[parameter].random() logger().info('initial value for {} is {}'.format(parameter, self.pymc_parameters[parameter].value)) self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma', lower=-10, upper=3, value=np.log(0.01)) self.pymc_parameters['sigma'] = pymc.Lambda('sigma', lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp( log_sigma)) self.pymc_parameters['precision'] = pymc.Lambda('precision', lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp( -2 * log_sigma)) # add missing multiplicity terms to parameterSet so that the system has the same number of parameters par.add_missing(self.parameters_to_optimize, param, sample_n5=self.sample_n5) @pymc.deterministic def mm_energy(pymc_parameters=self.pymc_parameters, param=param): mm = np.ndarray(0) par.update_param_from_sample(self.parameters_to_optimize, param, model=self, rj=self.rj, phase=self.sample_phase, n_5=self.sample_n5, continuous=self.continuous_phase, model_type='openmm') for mol in self.frags: mol.compute_energy(param, offset=self.pymc_parameters['%s_offset' % mol.topology._residues[0]], platform=self.platform) mm = np.append(mm, mol.mm_energy / kilojoules_per_mole) return mm size = sum([len(i.qm_energy) for i in self.frags]) qm_energy = np.ndarray(0) for i in range(len(frags)): qm_energy = np.append(qm_energy, frags[i].qm_energy) #diff_energy = np.ndarray(0) #for i in range(len(frags)): # diff_energy = np.append(diff_energy, frags[i].delta_energy) self.pymc_parameters['mm_energy'] = mm_energy self.pymc_parameters['qm_fit'] = pymc.Normal('qm_fit', mu=self.pymc_parameters['mm_energy'], tau=self.pymc_parameters['precision'], size=size, observed=True, value=qm_energy)
from matplotlib import pyplot as plt #count_data = np.loadtxt("txtdata.csv") count_data = np.loadtxt("txtdata_sim.csv") n_count_data = len(count_data) print(count_data.mean()) alpha = 1.0 / count_data.mean() # Recall count_data is the # variable that holds our txt counts lambda_1 = pm.Exponential("lambda_1", alpha) lambda_2 = pm.Exponential("lambda_2", alpha) tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data) @pm.deterministic def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2): out = np.zeros(n_count_data) out[:tau] = lambda_1 # lambda before tau is lambda1 out[tau:] = lambda_2 # lambda after (and including) tau is lambda2 return out observation = pm.Poisson("obs", lambda_, value=count_data, observed=True) model = pm.Model([observation, lambda_1, lambda_2, tau]) mcmc = pm.MCMC(model)
def mult_voigts(velocity, fluxv, fluxv_err, f, gamma, l0, nvoigts, RES, velo_range): ''' Fitting a number of Voigt profiles to a spectrum in velocity space, given the restframe wavelenth l0 (Angstrom), the oscillator strength f, damping constant gamma (km/s), and spectral resolution RES (km/s) ''' #low_b = 2 low_b = round(0.354*RES/(2*np.sqrt(np.log(2))),2) print "\n Components with ~ b >", low_b, \ "km/s can be resolved \n" tau_s = [] for i in [0, 1]: tau_s.append(1 / np.array(fluxv_err[i])**2) #@pymc.stochastic(dtype=float) # def a(value=1.0, mu=1.0, sig=0.1, doc="B"): # pp = 0.0 # #if 0.85 <= value < 1.15: # pp = gauss(value, mu, sig) # #else: # # pp = -np.inf # return pp # Continuum model (up to quadratic polinomial) mu_bg_1 = np.nansum(fluxv[0]) / (len(fluxv[0]) - fluxv[0].count(np.nan)) mu_bg_2 = np.nansum(fluxv[1]) / (len(fluxv[1]) - fluxv[1].count(np.nan)) print mu_bg_1, mu_bg_2 @pymc.stochastic(dtype=float) def a_1(value=mu_bg_1, mu=mu_bg_1, sig=0.5 * mu_bg_1, doc="a"): if mu_bg_1 / 2.0 < value < mu_bg_1 * 2.0: pp = gauss(value, mu, sig) else: pp = -np.inf return pp @pymc.stochastic(dtype=float) def a1_1(value=0.0, mu=0.0, sig=0.5, doc="a1"): if -0.3 < value < 0.3: pp = gauss(value, mu, sig) else: pp = -np.inf return pp @pymc.stochastic(dtype=float) def a2_1(value=0.0, mu=0.0, sig=0.5, doc="a2"): if -0.3 < value < 0.3: pp = gauss(value, mu, sig) else: pp = -np.inf return pp @pymc.stochastic(dtype=float) def a_2(value=mu_bg_2, mu=mu_bg_2, sig=0.5 * mu_bg_2, doc="a"): if mu_bg_2 / 2.0 < value < mu_bg_2 * 2.0: pp = gauss(value, mu, sig) else: pp = -np.inf return pp @pymc.stochastic(dtype=float) def a1_2(value=0.0, mu=0.0, sig=0.5, doc="a1"): if -0.3 < value < 0.3: pp = gauss(value, mu, sig) else: pp = -np.inf return pp @pymc.stochastic(dtype=float) def a2_2(value=0.0, mu=0.0, sig=0.5, doc="a2"): if -0.3 < value < 0.3: pp = gauss(value, mu, sig) else: pp = -np.inf return pp vars_dic = {} for i in range(1, nvoigts + 1): v0 = pymc.Uniform('v0' + str(i), lower=-velo_range, upper=velo_range, doc='v0' + str(i)) b = pymc.DiscreteUniform('b' + str(i), lower=round(low_b, 0), upper=30, value=low_b+20, doc='b' + str(i)) N = pymc.Uniform('N' + str(i), lower=0.0, upper=20, value=15, doc='N' + str(i)) vars_dic['v0' + str(i)] = v0 vars_dic['b' + str(i)] = b vars_dic['N' + str(i)] = N print "\n Starting MCMC " + '(pymc version:', pymc.__version__, ")" print "\n This might take a while ..." @pymc.deterministic(plot=False) def multVoigt(vv=velocity, a_1=a_1, a1_1=a1_1, a2_1=a2_1, a_2=a_2, a1_2=a1_2, a2_2=a2_2, f=f, gamma=gamma, l0=l0, nvoigts=nvoigts, vars_dic=vars_dic): model_matrix = [] for i in [0, 1]: conv_val = RES / (2 * np.sqrt(2 * np.log(2)) * tf[i]) gauss_k = Gaussian1DKernel(stddev=conv_val, mode="oversample") if i == 0: flux = np.ones(len(vv[i])) * a_1 #(a_1 + a1_1 * vv[i] + a2_1 * (power_lst(vv[i], 2))) if i == 1: flux = np.ones(len(vv[i])) * a_2 # (a_2 + a1_2 * vv[i] + a2_2 * (power_lst(vv[i], 2))) for j in range(1, nvoigts + 1): v = vv[i] - vars_dic["v0" + str(j)] flux *= add_abs_velo(v, vars_dic["N" + str(j)], vars_dic["b" + str(j)], gamma[i], f[i], l0[i]) #model_matrix.append(flux) model_matrix.append(np.convolve(flux, gauss_k, mode='same')) #print a_1, a1_1, a2_1, a_2, a1_2, a2_2 #print vars_dic #print model_matrix return model_matrix y_val = pymc.Normal('y_val', mu=multVoigt, tau=tau_s, value=fluxv, observed=True) return locals()
def __init__(self, param, frags, stream=None, param_to_opt=None, rj=False, init_random=True, tau='mult'): """ Parameters ---------- param : Parmed CharmmParameterSet frags : list of torsionfit.QMDataBase stream : str Path to CHARMM stream file. Default None. If None, param_to_opt list must be given. When a stream file is specified, param_to_opt is generated if the penalty of the parameters are greater than a threshold. param_to_opt : list of tuples of torsions. Default None. rj : bool If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False init_random: bool Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set. Default True tau: string. options are 'mult' or 'single'. When 'mult', every element in K_m will have its own 'tau', when 'single', each K_m will have one tau. Default 'mult' Returns ------- pymc model """ if type(frags) != list: frags = [frags] self.pymc_parameters = dict() self.frags = frags self.rj = rj if param_to_opt: self.parameters_to_optimize = param_to_opt else: self.parameters_to_optimize = TorsionScan.to_optimize( param, stream) multiplicity_bitstrings = dict() # offset for frag in self.frags: name = '%s_offset' % frag.topology._residues[0] offset = pymc.Uniform(name, lower=-50, upper=50, value=0) self.pymc_parameters[name] = offset if tau == 'mult': value = np.log(np.ones(6) * 0.01) elif tau == 'single': value = np.log(0.01) else: raise Exception( "Only 'mult' and 'single' are allowed options for tau") for p in self.parameters_to_optimize: torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] # lower and upper for this distribution are based on empirical data that below this amount the prior is too # biased and above the moves are usually rejected. self.pymc_parameters['log_sigma_k_{}'.format( torsion_name)] = pymc.Uniform( 'log_sigma_k_{}'.format(torsion_name), lower=-4.6052, upper=3.453, value=value) self.pymc_parameters['sigma_k_{}'.format( torsion_name)] = pymc.Lambda( 'sigma_k_{}'.format(torsion_name), lambda log_sigma_k=self.pymc_parameters[ 'log_sigma_k_{}'.format(torsion_name)]: np.exp( log_sigma_k)) self.pymc_parameters['precision_k_{}'.format( torsion_name)] = pymc.Lambda( 'precision_k_{}'.format(torsion_name), lambda log_sigma_k=self.pymc_parameters[ 'log_sigma_k_{}'.format(torsion_name)]: np.exp( -2 * log_sigma_k)) self.pymc_parameters['{}_K'.format(torsion_name)] = pymc.Normal( '{}_K'.format(torsion_name), value=np.zeros(6), mu=0, tau=self.pymc_parameters['precision_k_{}'.format( torsion_name)]) if torsion_name not in multiplicity_bitstrings.keys(): multiplicity_bitstrings[torsion_name] = 0 if self.rj: for torsion_name in multiplicity_bitstrings.keys(): name = torsion_name + '_multiplicity_bitstring' bitstring = pymc.DiscreteUniform( name, lower=0, upper=63, value=multiplicity_bitstrings[torsion_name]) self.pymc_parameters[name] = bitstring if init_random: # randomize initial value for parameter in self.pymc_parameters: if type( self.pymc_parameters[parameter] ) != pymc.CommonDeterministics.Lambda and parameter[: 11] != 'log_sigma_k': self.pymc_parameters[parameter].random() logger().info('initial value for {} is {}'.format( parameter, self.pymc_parameters[parameter].value)) self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma', lower=-10, upper=3, value=np.log(0.01)) self.pymc_parameters['sigma'] = pymc.Lambda( 'sigma', lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp( log_sigma)) self.pymc_parameters['precision'] = pymc.Lambda( 'precision', lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp( -2 * log_sigma)) # Precalculate phis n = np.array([1., 2., 3., 4., 5., 6.]) self.models = [] for i in itertools.product((0, 1), repeat=6): self.models.append(i) inner_sum = [] for i, frag in enumerate(frags): inner_sum.append(OrderedDict()) for t in frag.phis: inner_sum[i][t] = (1 + np.cos( frag.phis[t][:, np.newaxis] * n[:, np.newaxis])).sum(-1) self.inner_sum = inner_sum @pymc.deterministic def torsion_energy(pymc_parameters=self.pymc_parameters): mm = np.ndarray(0) for i, mol in enumerate(self.frags): Fourier_sum = np.zeros((mol.n_frames)) for t in inner_sum[i]: name = t[0] + '_' + t[1] + '_' + t[2] + '_' + t[3] if self.rj: K = pymc_parameters['{}_K'.format(name)] * self.models[ pymc_parameters['{}_multiplicity_bitstring'.format( name)]] else: K = pymc_parameters['{}_K'.format(name)] Fourier_sum += (K * inner_sum[i][t]).sum(1) Fourier_sum_rel = Fourier_sum - min(Fourier_sum) Fourier_sum_rel += pymc_parameters['{}_offset'.format( mol.topology._residues[0])] mm = np.append(mm, Fourier_sum) return mm size = sum([len(i.qm_energy) for i in self.frags]) residual_energy = np.ndarray(0) for i in range(len(frags)): residual_energy = np.append(residual_energy, frags[i].delta_energy) self.pymc_parameters['torsion_energy'] = torsion_energy self.pymc_parameters['qm_fit'] = pymc.Normal( 'qm_fit', mu=self.pymc_parameters['torsion_energy'], tau=self.pymc_parameters['precision'], size=size, observed=True, value=residual_energy)
def create_multi_mk_model(tree, chars, Qtype, pi, nregime=2): """ Create an mk model with multiple regimes to be sampled from with MCMC. Regime number is fixed and the location of the regime shift is allowed to change """ if type(chars) == dict: chars = [chars[l] for l in [n.label for n in tree.leaves()]] # Preparations nchar = len(set(chars)) if Qtype=="ER": N = 1 elif Qtype=="Sym": N = int(binom(nchar, 2)) elif Qtype=="ARD": N = int((nchar ** 2 - nchar)) else: ValueError("Qtype must be one of: ER, Sym, ARD") # This model has 2 components: Q parameters and a switchpoint # They are combined in a custom likelihood function ########################################################################### # Switchpoint: ########################################################################### # Modeling the movement of the regime shift(s) is the tricky part # Regime shifts will only be allowed to happen at a node # Regime shift: Uniform categorical distribution valid_switches = [i.ni for i in tree if not (i.isleaf or i.isroot)] # Uniform switch_ind = pymc.DiscreteUniform("switch_ind",lower=0, upper=len(valid_switches)-1) @pymc.deterministic(dtype=int) def switch(name="switch",switch_ind=switch_ind): return valid_switches[switch_ind] ########################################################################### # Qparams: ########################################################################### # Unscaled Q param: Dirichlet distribution # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2 theta = [1.0/2.0]*N # One set of Q-parameters per regime allQparams_init = np.empty(nregime, dtype=object) allQparams_init_full = np.empty(nregime, dtype=object) allScaling_factors = np.empty(nregime, dtype=object) for i in range(nregime): if N != 1: allQparams_init[i] = pymc.Dirichlet("allQparams_init"+str(i), theta) allQparams_init_full[i] = pymc.CompletedDirichlet("allQparams_init_full"+str(i), allQparams_init[i]) else: # Dirichlet function does not like creating a distribution # with only 1 state. Set it to 1 by hand allQparams_init_full[i] = [[1.0]] # Exponential scaling factor for Qparams allScaling_factors[i] = pymc.Exponential(name="allScaling_factors"+str(i), beta=1.0) # Scaled Qparams; we would not expect them to necessarily add # to 1 as would be the case in a Dirichlet distribution # Regimes are grouped by rows. Each row is a regime. @pymc.deterministic(plot=False) def Qparams(q=allQparams_init_full, s=allScaling_factors): Qs = np.empty([nregime,N]) for n in range(N): for i in range(nregime): Qs[i][n] = q[i][0][n]*s[i] return Qs ########################################################################### # Likelihood ########################################################################### # The likelihood function # Pre-allocating arrays qarray = np.zeros([nregime,N]) locsarray = np.empty([2], dtype=object) l = mk_mr.create_likelihood_function_multimk(tree=tree, chars=chars, Qtype=Qtype, pi="Equal", findmin=False, nregime=2) @pymc.potential def multi_mklik(q = Qparams, switch=switch, name="multi_mklik"): locs = mk_mr.locs_from_switchpoint(tree,tree[int(switch)],locsarray) np.copyto(qarray, q) return l(qarray, locs=locs) return locals()