def test_multivariate_observations(self): coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)} data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20) with pm.Model(coords=coords): p = pm.Beta("p", 1, 1, size=(3, )) pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data) idata = pm.sample(draws=50, chains=2, tune=100, return_inferencedata=True) test_dict = { "posterior": ["p"], "sample_stats": ["lp"], "log_likelihood": ["y"], "observed_data": ["y"], } fails = check_multiple_attrs(test_dict, idata) assert not fails assert "direction" not in idata.log_likelihood.dims assert "direction" in idata.observed_data.dims assert idata.log_likelihood["y"].shape == (2, 50, 20)
def mv_simple_discrete(): d = 2 n = 5 p = floatX_array([0.15, 0.85]) with pm.Model() as model: pm.Multinomial("x", n, at.constant(p), initval=np.array([1, 4])) mu = n * p # covariance matrix C = np.zeros((d, d)) for (i, j) in product(range(d), range(d)): if i == j: C[i, i] = n * p[i] * (1 - p[i]) else: C[i, j] = -n * p[i] * p[j] return model.compute_initial_point(), model, (mu, C)
def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: probs = pm.Dirichlet("probs", a=np.ones(6)) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) burned_trace = pm.sample( 20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False ) sim_priors = pm.sample_prior_predictive( return_inferencedata=False, samples=20, model=dm_model ) sim_ppc = pm.sample_posterior_predictive( burned_trace, return_inferencedata=False, samples=20, model=dm_model ) assert sim_priors["probs"].shape == (20, 6) assert sim_priors["obs"].shape == (20,) + mn_data.shape assert sim_ppc["obs"].shape == (20,) + mn_data.shape
def mv_simple_discrete(): d = 2 n = 5 p = np.array([.15, .85]) with pm.Model() as model: x = pm.Multinomial('x', n, pm.constant(p), shape=d, testval=np.array([1, 4])) mu = n * p #covariance matrix C = np.zeros((d, d)) for (i, j) in product(range(d), range(d)): if i == j: C[i, i] = n * p[i] * (1 - p[i]) else: C[i, j] = -n * p[i] * p[j] return model.test_point, model, (mu, C)
def test_multivariate(self): with pm.Model(): m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25])) trace = pm.sample_prior_predictive(10) assert trace.prior["m"].shape == (1, 10, 4)
completed_pi_list = [ pymc.CompletedDirichlet('completed_pi_%d' % i, dist) for i, dist in enumerate(pi_list) ] # Indicator variables of whether the pth person is in a group or not # DIMENSIONS: 1 x (num_people^2) for each list, where each element is Kx1 # DOMAIN : {0,1}, only one element of vector is 1, all else 0 # DISTRIBUTION: Categorical (using Multinomial with 1 observation) z_pTq_matrix = np.empty([num_people, num_people], dtype=object) z_pFq_matrix = np.empty([num_people, num_people], dtype=object) for p_person in range(num_people): for q_person in range(num_people): z_pTq_matrix[p_person, q_person] = pymc.Multinomial( 'z_%dT%d_vector' % (p_person, q_person), n=1, p=pi_list[p_person], trace=False) z_pFq_matrix[p_person, q_person] = pymc.Multinomial( 'z_%dF%d_vector' % (p_person, q_person), n=1, p=pi_list[q_person], trace=False) #---------------------------- Data Level ---------------------------------# # Combination of Priors to build the scalar parameter for y~Bernoulli @pymc.deterministic def bernoulli_parameters(z_pTq=z_pTq_matrix, z_pFq=z_pFq_matrix, B=B_matrix): """ Takes in the two z_lists of Categorical Stochastic Objects
theta=numpy.repeat(1.0, d.shape[2]), trace=True) ddiff = pymc.Lambda( "ddiff", lambda dir_1=pymc.CompletedDirichlet( "cdir_1", dir_1, trace=False), dir_2=pymc.CompletedDirichlet( "cdir_2", dir_2, trace=False): dir_2[0] - dir_1[0], trace=True) vals_1 = ds[anno[ctrl], igene] vals_2 = ds[anno[cond], igene] mn_1 = pymc.Multinomial("mn_1", value=vals_1, n=vals_1.sum(axis=1), p=dir_1, observed=True, trace=False) mn_2 = pymc.Multinomial("mn_2", value=vals_2, n=vals_2.sum(axis=1), p=dir_2, observed=True, trace=False) #mp = pymc.MAP([dir_1, dir_2, mn_1, mn_2]) #mp.fit() dir_1.value = array([0.25] * 3) dir_2.value = array([0.25] * 3) mcmc = pymc.MCMC([dir_1, dir_2, mn_1, mn_2, ddiff])
NUM_DRAWS = 100 NUM_SAMPLES = 50 TRUE_PROPS = [0.6, 0.3, 1.0] def generate_data(): data = [] for i in range(NUM_SAMPLES): x = numpy.random.multinomial(NUM_DRAWS, TRUE_PROPS) data.append(x) return data ############################################################################## # model props = pymc.Dirichlet( name="props", theta=[1.0, 1.0, 1.0], ) draws = pymc.Multinomial(name="draws", value=generate_data(), n=NUM_DRAWS, p=props, observed=True) mcmc = pymc.MCMC([props, draws]) mcmc.sample(iter=100000, burn=10000, thin=100) # mcmc.sample(iter=1000, burn=100, thin=1) summarize(mcmc, "props")
def create_model(data_matrix, num_people, num_groups, alpha, B): """ Function that takes in a set of data and returns a dictionary of MCMC object of each random variable. In addition, the hyperparameters for each random variable will be imbued in the objects. Overcommented for readibility's sake """ #---------------------------- Data Transform -----------------------------# #indices = np.indices(dimesions = np.shape(data_matrix)) #people_indices = indices[0].reshape(num_people*num_people) data_vector = data_matrix.reshape(num_people * num_people).T #---------------------------- Hyperparameters ----------------------------# # Average probability distribution of being in groups # DIMENSIONS: 1 x num_groups # SUPPORT: (0,inf) # DISTRIBUTION: None alpha_vector = alpha # Matrix of inter-group correlations # DIMENSIONS: num_groups x num_groups # SUPPORT: [0,1] # DISTRIBUTION: None B_matrix = B #---------------------------- Prior Parameters ---------------------------# # Actual group membership probabilities for each person # DIMENSIONS: 1 x (num_people * num_groups) # SUPPORT: (0,1], Elements of each vector should sum to 1 for each person # DISTRIBUTION: Dirichlet(alpha) pi_list = np.empty(num_people, dtype=object) for person in range(num_people): person_pi = pymc.Dirichlet('pi_%i' % person, theta=alpha_vector) pi_list[person] = person_pi # Indicator variables of whether the pth person is in a group or not # DIMENSIONS: 1 x (num_people^2) for each list, where each element is Kx1 # DOMAIN : {0,1}, only one element of vector is 1, all else 0 # DISTRIBUTION: Categorical (using Multinomial with 1 observation) z_pTq_matrix = np.empty([num_people, num_people], dtype=object) z_pFq_matrix = np.empty([num_people, num_people], dtype=object) for p_person in range(num_people): for q_person in range(num_people): z_pTq_matrix[p_person, q_person] = pymc.Multinomial( 'z_%dT%d_vector' % (p_person, q_person), n=1, p=pi_list[p_person]) z_pFq_matrix[p_person, q_person] = pymc.Multinomial( 'z_%dF%d_vector' % (p_person, q_person), n=1, p=pi_list[q_person]) #---------------------------- Data Level ---------------------------------# # Combination of Priors to build the scalar parameter for y~Bernoulli @pymc.deterministic def bernoulli_parameters(z_pTq=z_pTq_matrix, z_pFq=z_pFq_matrix, B=B_matrix): """ Takes in the two z_lists of Categorical Stochastic Objects Take their values (using Deterministic class) Dot Product with z'Bz """ bernoulli_parameters = np.empty([num_people, num_people], dtype=object) for p in range(num_people): for q in range(num_people): bernoulli_parameters[p, q] = np.dot(np.dot(z_pTq[p, q], B), z_pFq[p, q]) return bernoulli_parameters.reshape(1, num_people * num_people) # Observed response when person p is asked whether q is "connected" # Reshaped such that each person is asked sequentiall about all others, then # next person's vector, etc. # # Includes information about both p and q's group membership # y = Bern(z_p2p * B * z_q2p # y in {0,1} # DIMENSIONS: 1 x (num_people * num_people) y_vector = pymc.Bernoulli('y_vector', p=bernoulli_parameters, value=data_vector, observed=True) #---------------------------- Return all MCMC Objects --------------------# return locals()
#multi = pm.Multinomial('multi', n=poisson, p=dirich, value=grid1d.values(), observed=True) #want to do this #multi = pm.Multinomial('multi', n=poisson, p=dirich) #works #multi = pm.Multinomial('multi', n=poisson, p=dirich, value=[0,0,0,0,0,0,1,1,1], observed=True) #works when n == sum(value) """ multi = pm.Multinomial('multi', p=dirich, observed=True, n = [ np.sum(grid1d.values()[i]) for i in range( 0, len(grid1d.values()) ) ] , value = [ grid1d.values()[i] for i in range( 0, len(grid1d.values()) ) ] ) model = pm.Model([multi, dirich], name = 'model') """ #similar to the poisson's 'value =' except each grid get's it's own daily count, #instead of adding all of the grid-squares together (no np.sum() here) multi = pm.Multinomial( 'multi', p=dirich, observed=True, n=poisson, value=[grid1d.values()[i] for i in range(0, len(grid1d.values()))]) model = pm.Model([multi, dirich, poisson, expon], name='model') # <codecell> mcmc = pm.MCMC(model) mcmc.sample(200, 100, 1) # <codecell> dirich_samples = mcmc.trace('dirich')[:] expon_sapmples = mcmc.trace('expon')[:] #no samples from these last two b/c they're observed #poisson_samples = mcmc.trace('poisson1')[:]
S = [] S_steps = [] data_list = [] for name in datasets.iterkeys(): this_dataset = datasets[name] @pm.deterministic def this_asc_slice(asc=asc, slices = age_slices[name], dataset=this_dataset): out = [] for i in xrange(len(dataset)): out.append(sum(asc[slices[i]])) out = array(out) return out S_now = pm.Dirichlet(('S_%s'%name).replace('.','_'), this_asc_slice) S.append(S_now) data_list.append(pm.Multinomial('data',n=sum(this_dataset.N),p=S_now,value=this_dataset.N,isdata=True)) S_pred = pm.Dirichlet('S_pred',asc) M = pm.MCMC({'variables': [sc, scg, alph, asc, S, data_list, S_pred], '__name__': 'age_dist_model', 'step_methods': S_steps}, db='hdf5',comp_level = 5) M.use_step_method(pm.Metropolis, alph, sig=.05) for i in xrange(len(datasets)): M.use_step_method(pm.DirichletMultinomial, S[i])
p = pm.Beta("p",alpha=1,beta=1) n = pm.Binomial("Bino",n=19,p=p,value=5,observed=True) mcmc = pm.MCMC([n,p]) mcmc.sample(25000) %matplotlib inline from pymc.Matplot import plot as mcplot mcplot(mcmc.trace("p"),common_scale=False) # a simple demo for Dirichlet-Multinomal Conjugate N = 5 # dimension beta = np.ones(N) mu=pm.Dirichlet("mu", theta=beta) cmu = pm.CompletedDirichlet("cmu", D=mu) n = pm.Multinomial('n', n=D, p=cmu, value=n_class, observed=True) alpha = np.ones(N) theta = pm.Container([pm.Dirichlet("theta_%s" % i,theta=alpha) \ for i in range(N)]) ctheta = pm.Container([pm.CompletedDirichlet("ctheta_%s" % i, D=theta[i]) for i in range(N)]) c = pm.Container([pm.Multinomial("c_%s" % i, n=n_class[i], p=theta[i]\ ,value = data[i], observed=True)\ for i in range(N)]) @pm.deterministic def precision(mu=cmu, theta=ctheta): return np.sum([mu[0][i]*theta[i][0][i] for i in range(N)])
cur_obs = np.array([bpheb[where_bphe], bphe0[where_bphe]]).T n = np.sum(cur_obs, axis=1) # Need to have (b and not 0) on either chromosome p_bphe = pm.Lambda('p_bphe', lambda pb=pb[where_bphe], p0=p0[where_bphe], p1=p1: 1-(1-pb*(1-p0))**2, trace=False) data_bphe = pm.Binomial('data_bphe', p=p_bphe, n=n, value=bpheb[where_bphe], observed=True) where_phe = np.where(datatype=='phe') cur_obs = np.array([pheab[where_phe],phea[where_phe],pheb[where_phe],phe0[where_phe]]).T n = np.sum(cur_obs, axis=1) p_phe = pm.Lambda('p_%i'%i, lambda pb=pb[where_phe], p0=p0[where_phe], p1=p1: np.array([\ g_freqs['ab'](pb,p0,p1), g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1), g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1), g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]).T, trace=False) np.testing.assert_almost_equal(p_phe.value.sum(axis=1), 1) data_phe = pm.Multinomial('data_phe', p=p_phe, n=n, value=cur_obs, observed=True) where_gen = np.where(datatype=='gen') cur_obs = np.array([genaa[where_gen],genab[where_gen],gena0[where_gen],gena1[where_gen],genbb[where_gen],genb0[where_gen],genb1[where_gen],gen00[where_gen],gen01[where_gen],gen11[where_gen]]).T n = np.sum(cur_obs,axis=1) p_gen = pm.Lambda('p_gen', lambda pb=pb[where_gen], p0=p0[where_gen], p1=p1, g_freqs=g_freqs: \ np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]).T, trace=False) np.testing.assert_almost_equal(p_gen.value.sum(axis=1), 1) data_gen = pm.Multinomial('data_gen', p=p_gen, n=n, value=cur_obs, observed=True) # Now vivax. cur_obs = np.array([vivax_pos[where_vivax], vivax_neg[where_vivax]]).T pphe0 = pm.Lambda('pphe0_%i'%i, lambda pb=pb[where_vivax], p0=p0[where_vivax], p1=p1: (g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)), trace=False) p_vivax = pm.Lambda('p_vivax', lambda pphe0=pphe0, pv=pv: pv*(1-pphe0), trace=False) try: warnings.warn('Not using age correction')
def make_model(lon, lat, africa, n, datatype, genaa, genab, genbb, gen00, gena0, genb0, gena1, genb1, gen01, gen11, pheab, phea, pheb, phe0, prom0, promab, aphea, aphe0, bpheb, bphe0): logp_mesh = np.vstack((lon, lat)).T * np.pi / 180. # Probability of mutation in the promoter region, given that the other thing is a. p1 = pm.Uniform('p1', 0, .04, value=.01) # Spatial submodels spatial_b_vars = make_gp_submodel('b', logp_mesh, africa, with_africa_covariate=True) spatial_s_vars = make_gp_submodel('0', logp_mesh) sp_sub_b = spatial_b_vars['sp_sub'] sp_sub_s = spatial_s_vars['sp_sub'] # Loop over data clusters, adding nugget and applying link function. tilde_fs_d = [] p0_d = [] tilde_fb_d = [] pb_d = [] V_b = spatial_b_vars['V'] V_s = spatial_s_vars['V'] data_d = [] for i in xrange(len(n)): this_fb = sp_sub_b.f_eval[i] this_fs = sp_sub_s.f_eval[i] # Nuggeted field in this cluster tilde_fb_d.append( pm.Normal('tilde_fb_%i' % i, this_fb, 1. / V_b, value=np.random.normal(), trace=False)) tilde_fs_d.append( pm.Normal('tilde_fs_%i' % i, this_fs, 1. / V_s, value=np.random.normal(), trace=False)) # The frequencies. p0 = pm.Lambda('pb_%i' % i, lambda lt=tilde_fb_d[-1]: pm.invlogit(lt), trace=False) pb = pm.Lambda('p0_%i' % i, lambda lt=tilde_fs_d[-1]: pm.invlogit(lt), trace=False) # The likelihoods if datatype[i] == 'prom': cur_obs = [prom0[i], promab[i]] # Need to have either b and 0 or a and 1 on both chromosomes p = pm.Lambda('p_%i' % i, lambda pb=pb, p0=p0, p1=p1: (pb * p0 + (1 - pb) * p1)**2, trace=False) n = np.sum(cur_obs) data_d.append( pm.Binomial('data_%i' % i, p=p, n=n, value=prom0[i], observed=True)) elif datatype[i] == 'aphe': cur_obs = [aphea[i], aphe0[i]] n = np.sum(cur_obs) # Need to have (a and not 1) on either chromosome, or not (not (a and not 1) on both chromosomes) p = pm.Lambda('p_%i' % i, lambda pb=pb, p0=p0, p1=p1: 1 - (1 - (1 - pb) * (1 - p1))**2, trace=False) data_d.append( pm.Binomial('data_%i' % i, p=p, n=n, value=aphea[i], observed=True)) elif datatype[i] == 'bphe': cur_obs = [bpheb[i], bphe0[i]] n = np.sum(cur_obs) # Need to have (b and not 0) on either chromosome p = pm.Lambda('p_%i' % i, lambda pb=pb, p0=p0, p1=p1: 1 - (1 - pb * (1 - p0))**2, trace=False) data_d.append( pm.Binomial('data_%i' % i, p=p, n=n, value=aphea[i], observed=True)) elif datatype[i] == 'phe': cur_obs = np.array([pheab[i], phea[i], pheb[i], phe0[i]]) n = np.sum(cur_obs) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1: np.array([\ g_freqs['ab'](pb,p0,p1), g_freqs['a0'](pb,p0,p1)+g_freqs['a1'](pb,p0,p1)+g_freqs['aa'](pb,p0,p1), g_freqs['b0'](pb,p0,p1)+g_freqs['b1'](pb,p0,p1)+g_freqs['bb'](pb,p0,p1), g_freqs['00'](pb,p0,p1)+g_freqs['01'](pb,p0,p1)+g_freqs['11'](pb,p0,p1)]), trace=False) np.testing.assert_almost_equal(p.value.sum(), 1) data_d.append( pm.Multinomial('data_%i' % i, p=p, n=n, value=cur_obs, observed=True)) elif datatype[i] == 'gen': cur_obs = np.array([ genaa[i], genab[i], gena0[i], gena1[i], genbb[i], genb0[i], genb1[i], gen00[i], gen01[i], gen11[i] ]) n = np.sum(cur_obs) p = pm.Lambda('p_%i'%i, lambda pb=pb, p0=p0, p1=p1, g_freqs=g_freqs: \ np.array([g_freqs[key](pb,p0,p1) for key in ['aa','ab','a0','a1','bb','b0','b1','00','01','11']]), trace=False) np.testing.assert_almost_equal(p.value.sum(), 1) data_d.append( pm.Multinomial('data_%i' % i, p=p, n=n, value=cur_obs, observed=True)) # The fields plus the nugget, in convenient vector form @pm.deterministic def tilde_fb(tilde_fb_d=tilde_fb_d): """Concatenated version of tilde_fb, for postprocessing & Gibbs sampling purposes""" return np.hstack(tilde_fb_d) @pm.deterministic def tilde_fs(tilde_fs_d=tilde_fs_d): """Concatenated version of tilde_fs, for postprocessing & Gibbs sampling purposes""" return np.hstack(tilde_fs_d) return locals()
from pylab import * import pymc from pymc import Matplot import numpy as np from scipy.misc import factorial import spacepy.plot as spp data=np.array([33,66,1]) rates=pymc.Uniform('rates',0,100,size=4,value=[0.01,2,10,1]) @pymc.deterministic(plot=True) def prob(rates=rates): return np.array([0.33,0.66,0.01]) likelihood=pymc.Multinomial('likelihood',n=sum(data),p=prob,value=data,observed=True) M = pymc.MCMC(likelihood) M.sample(100000) Matplot.summary_plot(M) # # @pymc.observed # def y(value=1): # pymc.categorical_like() # # return 10**value * np.exp(-10)/ factorial(value) # # M = pymc.MCMC(y)