Пример #1
0
def main(n_chains=3):
    np.random.seed(113)
    x_train, y_train_onehot = prepare_data(train=True,
                                           onehot=True,
                                           take_first=500)
    print(f"Using {len(x_train)} train samples.")
    model = pm.Model()
    with model:
        w = pm.Bernoulli('w', p=0.5, shape=(784, 10))
        logit_vec = tt.dot(x_train, w)
        proba = softmax(logit_vec)
        y_obs = pm.Multinomial('y_obs', n=1, p=proba, observed=y_train_onehot)
        trace = None
        if os.path.exists(fpath_trace):
            with open(fpath_trace, 'rb') as f:
                trace = pickle.load(f)
            if trace.nchains != n_chains:
                print(
                    f"Reset previous progress {trace} to match n_chains={n_chains}"
                )
                trace = None
        trace = pm.sample(draws=3,
                          njobs=1,
                          chains=n_chains,
                          tune=0,
                          trace=trace)
    if trace.nchains == n_chains:
        # we didn't stop the training process
        with open(fpath_trace, 'wb') as f:
            pickle.dump(trace, f)
    convergence_plot(trace=trace, train=True)
Пример #2
0
    def test_pymc_implementation(self):
        """my analytical implementation and pyMC should yield the same results.
        Test expected value and variance for theta"""

        # need to use Bayes-Laplace prior: pyMC cannot deal with Haldane prior
        analyser_bl = ConfusionMatrixAnalyser(self.analyser.confusion_matrix,
                                              prior=bayes_laplace_prior)

        # inference with pyMC
        with pm.Model() as multinom_test:
            a = pm.Dirichlet('a', a=bayes_laplace_prior.astype(float).values)
            data_pred = pm.Multinomial('data_pred',
                                       n=self.N,
                                       p=a,
                                       observed=self.analyser.confusion_matrix)
            trace = pm.sample(5000)

        # get pymc samples
        pymc_trace_samples = pd.DataFrame(
            trace.get_values('a'),
            columns=self.analyser.confusion_matrix.index)

        # compare expected value and variance
        for i in self.analyser.theta_samples:
            self.assertAlmostEqual(pymc_trace_samples[i].mean(),
                                   analyser_bl.theta_samples[i].mean(),
                                   delta=1e-2)
            self.assertAlmostEqual(pymc_trace_samples[i].var(),
                                   analyser_bl.theta_samples[i].var(),
                                   delta=1e-3)
Пример #3
0
    def _sample_pymc3(cls, dist, size, seed):
        """Sample from PyMC3."""

        import pymc3
        pymc3_rv_map = {
            'MultivariateNormalDistribution': lambda dist:
                pymc3.MvNormal('X', mu=matrix2numpy(dist.mu, float).flatten(),
                cov=matrix2numpy(dist.sigma, float), shape=(1, dist.mu.shape[0])),
            'MultivariateBetaDistribution': lambda dist:
                pymc3.Dirichlet('X', a=list2numpy(dist.alpha, float).flatten()),
            'MultinomialDistribution': lambda dist:
                pymc3.Multinomial('X', n=int(dist.n),
                p=list2numpy(dist.p, float).flatten(), shape=(1, len(dist.p)))
        }

        dist_list = pymc3_rv_map.keys()

        if dist.__class__.__name__ not in dist_list:
            return None

        with pymc3.Model():
            pymc3_rv_map[dist.__class__.__name__](dist)
            samples = pymc3.sample(size, chains=1, progressbar=False, random_seed=seed)[:]['X']
            if samples.shape[0:len(size)] != size:
                samples = samples.reshape((size[0],) + samples.shape)
            return samples
Пример #4
0
    def test_multivariate(self):
        with pm.Model():
            m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25]), shape=4)
            trace = pm.sample_prior_predictive(10)

        assert m.random(size=10).shape == (10, 4)
        assert trace["m"].shape == (10, 4)
Пример #5
0
def get_dirichlet_multinomial_dpmixture(X, params):
    n_doc, n_feat = X.shape
    n_comp = params['n_trunc']

    with pm.Model() as model:
        # sample P ~ DP(G0)
        beta = pm.Beta('beta',
                       1.,
                       params['dp_alpha'],
                       shape=n_comp)
        p_comp = pm.Deterministic(
            'p_comp',
            beta * tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]]))
        pkw = pm.Dirichlet('pkw',
                           a=params['pkw_dirichlet_dist_alpha'] * np.ones(n_feat),
                           shape=(n_comp, n_feat))
        # sample X ~ P
        z = pm.Categorical('z',
                           p=p_comp,
                           shape=n_doc)
        x = pm.Multinomial('x',
                           n=X.sum(axis=1),
                           p=pkw[z],
                           observed=X)
    return model
Пример #6
0
    def create_model(self,observations, m, n_as):

        if self.name == "max_likelihood":
            states = list(range(n_as))
            # belief about the probability of an action given a state formed
            # by proprtion of samples
            counts = [observations.tolist().count(x) for x in set(states)]
            total_counts = sum(counts)
            agent_belief = [x / total_counts for x in counts]
            return agent_belief

        elif self.name == "bayesian" or "info_bayesian":
            with pm.Model() as model:
                sparsity = 3  # not zero
                alpha = np.full(n_as, 1)  # input for dirichlet

                # Weakly informative priors for unknown model parameters
                theta = pm.Dirichlet('theta', alpha / sparsity)

                # Likelihood (sampling distribution) of observations
                likelihood = pm.Multinomial('likelihood', m, theta, observed=observations)

                # a starting point can be given passing a dictionary where each of the keys containes a probaility cell-
                # This distribution can be placed certain standard deviations away from the mean of the underling distribution
                # for experimenting with distance
                # Uncomment to use Nuts
                #trace = pm.sample(1000,tune=500, cores=4,target_accept=0.95)
                trace= pm.sample(1000,step=pm.Metropolis(), tune=500)
            basic = pm.summary(trace).round(2)
            print(basic)

            # extract the inferred probability distribution of belief from the paramater space
            theta_infe = trace['theta'].mean(0).flatten()

            return theta_infe
    def create_model(self, X=None, y=None):
        if X:
            num_samples, self.num_pred = X.shape
            
        if y:
            num_samples, self.num_out = Y.shape

        model_input = theano.shared(np.zeros(shape=(1, self.num_pred)))
        model_output = theano.shared(np.zeros(shape=(1,self.num_out)))
        
        self.shared_vars = {
            'model_input': model_input,
            'model_output': model_output
        }
        
        with pm.Model() as model:
            # Define weights
            weights_1 = pm.Normal('w_1', mu=0, sd=1, 
                                  shape=(self.num_pred, self.n_hidden))
            weights_2 = pm.Normal('w_2', mu=0, sd=1,
                                  shape=(self.n_hidden, self.n_hidden))
            weights_out = pm.Normal('w_out', mu=0, sd=1, 
                                    shape=(self.n_hidden, self.num_outs))

            # Define activations
            acts_1 = tt.tanh(tt.dot(model_input, weights_1))
            acts_2 = tt.tanh(tt.dot(acts_1, weights_2))
            acts_out = tt.nnet.softmax(tt.dot(acts_2, weights_out))  # noqa

            # Define likelihood
            out = pm.Multinomial('likelihood', n=1, p=acts_out, 
                                 observed=model_output)
            
        return model
Пример #8
0
def dice_bias():
    y = np.asarray([20,  21, 17, 19, 17, 30])
    k = len(y)
    p = 1/k
    n = y.sum()

    with pm.Model() as dice_model:
        
        # initializes the Dirichlet distribution with a uniform prior:
        a = np.ones(k) 
        
        theta = pm.Dirichlet("theta", a=a)
        
        # Since theta[5] will hold the posterior probability 
        # of rolling a 6 we'll compare this to the 
        # reference value p = 1/6 to determine the amount of bias
        # in the die 
        six_bias = pm.Deterministic("six_bias", theta[k-1] - p)
        
        results = pm.Multinomial("results", n=n, p=theta, observed=y)
        dice_trace = pm.sample(draws=1000) 
        pm.traceplot(dice_trace, combined=True, lines={"theta": p})

    
    axes = pm.plot_posterior(dice_trace, 
                          varnames=["theta"], 
                          ref_val=np.round(p, 3))
    for i, ax in enumerate(axes):
        ax.set_title(f"{i+1}")

    six_bias = dice_trace["six_bias"]
    six_bias_perc = len(six_bias[six_bias>0])/len(six_bias)
    plt.show() 
    print(f'P(Six is biased) = {six_bias_perc:.2%}')
Пример #9
0
def test_pymc3_convert_dists():
    """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
    tt.config.compute_test_value = "ignore"
    theano.config.cxx = ""

    with pm.Model() as model:
        norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
        mvnorm_rv = pm.MvNormal("mvnorm_rv",
                                np.r_[0.0],
                                np.c_[1.0],
                                shape=1,
                                observed=np.r_[1.0])
        cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
        halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
        uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
        gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
        invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
        exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
        halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
        beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
        binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
        dirichlet_rv = pm.Dirichlet("dirichlet_rv",
                                    np.r_[0.1, 0.1],
                                    observed=np.r_[0.1, 0.1])
        poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
        bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
        betabinomial_rv = pm.BetaBinomial("betabinomial_rv",
                                          0.1,
                                          0.1,
                                          10,
                                          observed=5)
        categorical_rv = pm.Categorical("categorical_rv",
                                        np.r_[0.5, 0.5],
                                        observed=1)
        multinomial_rv = pm.Multinomial("multinomial_rv",
                                        5,
                                        np.r_[0.5, 0.5],
                                        observed=np.r_[2])

    # Convert to a Theano `FunctionGraph`
    fgraph = model_graph(model)

    rvs_by_name = {
        n.owner.inputs[1].name: n.owner.inputs[1]
        for n in fgraph.outputs
    }

    pymc_rv_names = {n.name for n in model.observed_RVs}
    assert all(
        isinstance(rvs_by_name[n].owner.op, RandomVariable)
        for n in pymc_rv_names)

    # Now, convert back to a PyMC3 model
    pymc_model = graph_model(fgraph)

    new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs}
    pymc_rv_names == new_pymc_rv_names
Пример #10
0
 def addObservations():
     with hierarchicalModel.pymcModel:
         for i in range(hierarchicalModel.nGroups):
             y = hierarchicalModel.y[i]
             y_obs = np.zeros((y.shape[0], 3))
             y_obs[:, :2] = y[:, 1:3]
             y_obs[:, 2] = y[:, 0] + y[:, 3]
             n_obs = np.sum(y_obs, axis=1)
             observations.append(
                 pm.Multinomial(
                     f'y_{i}', n=n_obs, p=theta,
                     observed=y_obs))  # todo theta for several groups
Пример #11
0
 def test_multivariate2(self):
     # Added test for issue #3271
     mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
     with pm.Model() as dm_model:
         probs = pm.Dirichlet("probs", a=np.ones(6), shape=6)
         obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
         burned_trace = pm.sample(20, tune=10, cores=1)
     sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model)
     sim_ppc = pm.sample_posterior_predictive(burned_trace,
                                              samples=20,
                                              model=dm_model)
     assert sim_priors["probs"].shape == (20, 6)
     assert sim_priors["obs"].shape == (20, ) + obs.distribution.shape
     assert sim_ppc["obs"].shape == (20, ) + obs.distribution.shape
Пример #12
0
    def _sample_pymc3(cls, dist, size, seed):
        """Sample from PyMC3."""

        import pymc3
        pymc3_rv_map = {
            'MultivariateNormalDistribution':
            lambda dist: pymc3.MvNormal('X',
                                        mu=matrix2numpy(dist.mu, float).
                                        flatten(),
                                        cov=matrix2numpy(dist.sigma, float),
                                        shape=(1, dist.mu.shape[0])),
            'MultivariateBetaDistribution':
            lambda dist: pymc3.Dirichlet(
                'X', a=list2numpy(dist.alpha, float).flatten()),
            'MultinomialDistribution':
            lambda dist: pymc3.Multinomial('X',
                                           n=int(dist.n),
                                           p=list2numpy(dist.p, float).flatten(
                                           ),
                                           shape=(1, len(dist.p)))
        }

        sample_shape = {
            'MultivariateNormalDistribution':
            lambda dist: matrix2numpy(dist.mu).flatten().shape,
            'MultivariateBetaDistribution':
            lambda dist: list2numpy(dist.alpha).flatten().shape,
            'MultinomialDistribution':
            lambda dist: list2numpy(dist.p).flatten().shape
        }

        dist_list = pymc3_rv_map.keys()

        if dist.__class__.__name__ not in dist_list:
            return None

        import logging
        logging.getLogger("pymc3").setLevel(logging.ERROR)
        with pymc3.Model():
            pymc3_rv_map[dist.__class__.__name__](dist)
            samples = pymc3.sample(draws=prod(size),
                                   chains=1,
                                   progressbar=False,
                                   random_seed=seed,
                                   return_inferencedata=False,
                                   compute_convergence_checks=False)[:]['X']
        return samples.reshape(size +
                               sample_shape[dist.__class__.__name__](dist))
Пример #13
0
def mv_simple_discrete():
    d = 2
    n = 5
    p = np.array([.15, .85])
    with pm.Model() as model:
        pm.Multinomial('x', n, tt.constant(p), shape=d, testval=np.array([1, 4]))
        mu = n * p
        # covariance matrix
        C = np.zeros((d, d))
        for (i, j) in product(range(d), range(d)):
            if i == j:
                C[i, i] = n * p[i] * (1 - p[i])
            else:
                C[i, j] = -n * p[i] * p[j]

    return model.test_point, model, (mu, C)
Пример #14
0
def mv_simple_discrete():
    d = 2
    n = 5
    p = floatX_array([0.15, 0.85])
    with pm.Model() as model:
        pm.Multinomial("x", n, at.constant(p), initval=np.array([1, 4]))
        mu = n * p
        # covariance matrix
        C = np.zeros((d, d))
        for (i, j) in product(range(d), range(d)):
            if i == j:
                C[i, i] = n * p[i] * (1 - p[i])
            else:
                C[i, j] = -n * p[i] * p[j]

    return model.initial_point, model, (mu, C)
Пример #15
0
 def test_multivariate_observations(self):
     coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)}
     data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20)
     with pm.Model(coords=coords):
         p = pm.Beta("p", 1, 1, shape=(3,))
         pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data)
         idata = pm.sample(draws=50, tune=100, return_inferencedata=True)
     test_dict = {
         "posterior": ["p"],
         "sample_stats": ["lp"],
         "log_likelihood": ["y"],
         "observed_data": ["y"],
     }
     fails = check_multiple_attrs(test_dict, idata)
     assert not fails
     assert "direction" not in idata.log_likelihood.dims
     assert "direction" in idata.observed_data.dims
Пример #16
0
def get_dirichlet_multinomial_mixture(X, params):
    n_doc, n_feat = X.shape
    n_comp = params['n_comp']

    with pm.Model() as model:
        pkw = pm.Dirichlet('pkw',
                           a=params['pkw_dirichlet_dist_alpha'] * np.ones(n_feat),
                           shape=(n_comp, n_feat))
        p_comp = pm.Dirichlet('p_comp',
                              a=params['pcomp_dirichlet_dist_alpha'] * np.ones(n_comp))
        z = pm.Categorical('z',
                           p=p_comp,
                           shape=n_doc)
        x = pm.Multinomial('x',
                           n=X.sum(axis=1),
                           p=pkw[z],
                           observed=X)
    return model
Пример #17
0
 def test_multivariate2(self):
     # Added test for issue #3271
     mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
     with pm.Model() as dm_model:
         probs = pm.Dirichlet("probs", a=np.ones(6))
         obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
         burned_trace = pm.sample(20,
                                  tune=10,
                                  cores=1,
                                  return_inferencedata=False,
                                  compute_convergence_checks=False)
     sim_priors = pm.sample_prior_predictive(samples=20, model=dm_model)
     sim_ppc = pm.sample_posterior_predictive(burned_trace,
                                              samples=20,
                                              model=dm_model)
     assert sim_priors["probs"].shape == (20, 6)
     assert sim_priors["obs"].shape == (20, ) + mn_data.shape
     assert sim_ppc["obs"].shape == (20, ) + mn_data.shape
def getAngelRate(data, n_sample=10000, n_chain=3, ax=None):
    # データの整理
    data_0 = data.query('campaign != 1')
    data_1 = data.query('campaign == 1')
    d = np.array([[
        sum(data_0['angel'] == 0),
        sum(data_0['angel'] == 1),
        sum(data_0['angel'] == 2)
    ],
                  [
                      sum(data_1['angel'] == 0),
                      sum(data_1['angel'] == 1),
                      sum(data_1['angel'] == 2)
                  ]])
    weight = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 2.0]])
    # パラメータ推定
    with pm.Model() as model:
        alpha = [1., 1., 1.]  # hyper-parameter of DirichletDist.
        pi = pm.Dirichlet('pi', a=np.array(alpha))
        for i in np.arange(d.shape[0]):
            piw = pi * weight[i]
            m = pm.Multinomial('m_%s' % (i),
                               n=np.sum(d[i]),
                               p=piw,
                               observed=d[i])
        trace = pm.sample(n_sample, chains=n_chain)
    np.savetxt('trace_pi.csv', trace['pi'], delimiter=',')
    # Silver
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 1])
    print('Silver : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Silver ExpectedValue : {}'.format(trace['pi'][:, 1].mean()))
    # Gold
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 2])
    print('Gold : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Gold ExpectedValue : {}'.format(trace['pi'][:, 2].mean()))
    # save fig
    if ax is not None:
        pm.plot_posterior(trace['pi'][:, 0], ax=ax[0])
        pm.plot_posterior(trace['pi'][:, 1], ax=ax[1])
        pm.plot_posterior(trace['pi'][:, 2], ax=ax[2])
        ax[0].set_title('Nothing')
        ax[1].set_title('SilverAngel')
        ax[2].set_title('GoldAngel')
    return trace
Пример #19
0
def test_save_load(tmp_path_factory, c, sig_defs):

    # make small for speed
    c = c[0:30]
    sig_defs = sig_defs[0:5]

    dataset_args = {'foo': 'bar'}
    model_args = {'bar': 'baz'}
    pymc3_args = {'baz': 'foo'}

    # train a model with 5 sigs
    with pm.Model() as model:
        data = pm.Data("data", c)
        N = data.sum(1).reshape((c.shape[0], 1))
        activities = ch_dirichlet("activities",
                                  a=np.ones(5),
                                  shape=(c.shape[0], 5))
        B = pm.math.dot(activities, sig_defs)
        pm.Multinomial('corpus', n=N, p=B, observed=data)

        trace = pm.ADVI()
        trace.fit()

    # checkpoint
    fp = tmp_path_factory.mktemp("ckp") / "vanilla_lda.ckp"
    save_checkpoint(fp, model, trace, dataset_args, model_args, pymc3_args)

    # load model
    m2, t2, dataset_args2, model_args2, pymc3_args2 = load_checkpoint(fp)

    # all params should be identical
    # checks are weak because __eq__ methods are not provided
    #assert str(model) == str(m2), 'model load failed'
    assert np.allclose(trace.hist, t2.hist), 'trace load failed'
    assert dataset_args == dataset_args2, 'dataset_args load failed'
    assert model_args == model_args2, 'model_args load failed'
    assert pymc3_args == pymc3_args2, 'dataset_args load failed'

    # with same seed, both models should tune with same result
    # test model tuning
    trace.refine(100)
    t2.refine(100)
    assert np.allclose(trace.hist, t2.hist), 'trace tuning failed'
def make_nn(ann_input, ann_output, n_hidden):

    init_1 = np.random.randn(X.shape[1], n_hidden)
    init_2 = np.random.randn(n_hidden, n_hidden)
    init_out = np.random.randn(n_hidden, Y.shape[1])

    with pm.Model() as nn_model:
        # Define weights
        w_1 = pm.Normal('w_1',
                        mu=0,
                        sd=1,
                        shape=(X.shape[1], n_hidden),
                        testval=init_1)
        w_2 = pm.Normal('w_2',
                        mu=0,
                        sd=1,
                        shape=(n_hidden, n_hidden),
                        testval=init_2)
        w_out = pm.Normal('w_out',
                          mu=0,
                          sd=1,
                          shape=(n_hidden, Y.shape[1]),
                          testval=init_out)

        # Define activations
        acts_1 = pm.Deterministic('activations_1',
                                  tt.tanh(tt.dot(ann_input, w_1)))
        acts_2 = pm.Deterministic('activations_2', tt.tanh(tt.dot(acts_1,
                                                                  w_2)))
        acts_out = pm.Deterministic('activations_out',
                                    tt.nnet.softmax(tt.dot(acts_2, w_out)))

        # Define likelihood
        out = pm.Multinomial('likelihood',
                             n=1,
                             p=acts_out,
                             observed=ann_output)

    return nn_model
Пример #21
0
    def _make_model(self):

        from pymc3.distributions.transforms import interval
        # only the 1 % of highest expressed genes
        gc = np.sum(self.nCounts, axis=1)
        zeros = np.any(np.int_(self.counts) == 0, axis=1)
        nzCounts = self.counts[~zeros]
        ind = np.logical_and(~zeros, gc > np.percentile(gc, 99))
        self.feature_selection = ind
        subCounts = self.counts[ind]
        nsubCounts = self.nCounts[gc > np.percentile(gc, 99.9)]
        print('Data shape:')
        print(subCounts.shape)

        mCounts = np.int_(subCounts)

        multiNn = np.sum(mCounts, axis=0)
        ldata = self.tau_log_E_p[:, ind]

        p_f = .95
        p_t = .95
        sparsity = 2  # ToDo: fit LKJCholeskyCov to corr distribution
        n = self.pheno['tcRes'].values[:, None]
        tc = self.pheno['tcEst'].values[:, None]
        cMean = np.mean(ldata, axis=0)
        #cMean.shape = cMean.shape + (1,)
        cSd = np.std(ldata, axis=0)
        #cSd.shape = cMean.shape
        #cSdMv = np.stack(np.diag(l) for l in cSd)
        # https://stats.stackexchange.com/questions/237847/what-are-the-properties-of-a-half-cauchy-distribution

        n_dim = mCounts.shape[0]
        n_samp = mCounts.shape[1]

        # nummerical padding
        numpad = 1e-5

        def pa2alpha(p_a):
            return (p_a + p_f - 1) / (p_t + p_f - 1)

        def alpha2pa(alpha):
            return (alpha * (p_t + p_f - 1)) - p_f + 1

        def mixCounts(x, alpha):
            return tt.sum(x * alpha, axis=0)

        def mixSep(x_f, x_t, alpha):
            exp_f = tt.nnet.softmax(x_f)
            exp_t = tt.nnet.softmax(x_t)
            result = ((1 - alpha) * exp_f) + (alpha * exp_t)
            return result

        with pm.Model() as model:
            # bounds with nummerical padding
            p_a = pm.Beta('p_a',
                          alpha=(n * tc) + 1,
                          beta=(n * (1 - tc)) + 1,
                          transform=pm.distributions.transforms.Interval(
                              1 - (p_f + numpad), (p_t + numpad)),
                          shape=(n_samp, 1),
                          testval=alpha2pa(tc))
            alpha = pm.Deterministic('alpha', pa2alpha(p_a))

            mus_f = pm.Normal('mus_f',
                              mu=cMean,
                              sd=cSd,
                              shape=n_dim,
                              testval=cMean)
            mus_t = pm.Normal('mus_t',
                              mu=cMean,
                              sd=cSd,
                              shape=n_dim,
                              testval=cMean)
            sdd = pm.HalfNormal.dist(sd=cSd)
            packed_L_t = pm.LKJCholeskyCov('packed_L_t',
                                           n=n_dim,
                                           eta=sparsity,
                                           sd_dist=sdd)
            packed_L_f = pm.LKJCholeskyCov('packed_L_f',
                                           n=n_dim,
                                           eta=sparsity,
                                           sd_dist=sdd)
            chol_f = pm.expand_packed_triangular(n_dim, packed_L_f, lower=True)
            chol_t = pm.expand_packed_triangular(n_dim, packed_L_t, lower=True)
            x_f = pm.MvNormal('x_f',
                              mu=mus_f,
                              chol=chol_f,
                              testval=ldata,
                              shape=(n_samp, n_dim))
            x_t = pm.MvNormal('x_t',
                              mu=mus_t,
                              chol=chol_t,
                              testval=ldata,
                              shape=(n_samp, n_dim))

            x = pm.Deterministic('x', mixSep(x_f, x_t, alpha))

            obs = pm.Multinomial('obs',
                                 n=multiNn,
                                 p=x,
                                 observed=mCounts.T,
                                 dtype='int64',
                                 shape=mCounts.T.shape)

        return model
Пример #22
0
def get_object_places(table_path, graph_path, visited_places):
    assert os.path.exists(table_path)
    assert os.path.exists(graph_path)

    df = pd.read_csv(table_path, index_col=0)

    while True:
        #speech to text part
        # obtain audio from the microphone
        r = sr.Recognizer()
        with sr.Microphone() as source:

            print("Quale oggetto stai cercando?\n")
            #audio = r.listen(source)

            # recognize speech using Google Cloud Speech
            try:
                object_to_search = "wallet"
                #object_to_search = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS).strip()

                #similar = check_similapr_objects(df, object_to_search)
                #print(similar)
                if (len(df.loc[df["object"] == object_to_search]) > 0):
                    break
                elif (similar != None):
                    answer = input(
                        object_to_search +
                        " non trovato, trovata compatibilita' con " + similar +
                        ", utilizzare la sua distribuzione?[Y/n]\n")
                    if (answer == "Y"):
                        object_to_search = similar
                        break

            except sr.UnknownValueError:
                print("Google Cloud Speech could not understand audio")
            except sr.RequestError as e:
                print(
                    "Could not request results from Google Cloud Speech service; {0}"
                    .format(e))

    df = df.loc[df["object"] == object_to_search].drop('object', 1)
    places = list(df.keys())

    #dropping places already visited

    for visited_place in visited_places:
        df = df.drop(visited_place, 1)
        places.remove(visited_place)

    row = df
    print(row)

    for x in list(zip(row.keys(), row.values[0])):
        print(str(x[0]) + " " + str(x[1]))

    knowledge = row.values[0]

    number_of_places = len(knowledge)

    distances_dict = get_distances(graph_path,
                                   ("pose", 1.7219, 11.1261, "storage"))
    distances = [distances_dict[key] for key in places]
    max_distance = max(distances)

    inverted_distances = list(
        map(lambda x: abs(x - max_distance + 1) / 5, distances))

    prior_knowledge = np.array(inverted_distances)

    with pm.Model() as model:
        # Parameters of the Multinomial are from a Dirichlet
        parameters = pm.Dirichlet('parameters',
                                  a=prior_knowledge,
                                  shape=number_of_places)
        # Observed data is from a Multinomial distribution
        observed_data = pm.Multinomial('observed_data',
                                       n=sum(knowledge),
                                       p=parameters,
                                       shape=number_of_places,
                                       observed=knowledge)

    with model:
        # Sample from the posterior
        trace = pm.sample(draws=1000,
                          chains=2,
                          tune=500,
                          discard_tuned_samples=True)

        trace_df = pd.DataFrame(trace['parameters'], columns=places)

    # For probabilities use samples after burn in
    pvals = trace_df.iloc[:, :number_of_places].mean(axis=0)
    tag_and_dist = sorted(zip(places, pvals), key=lambda x: x[1], reverse=True)
    display_probs(dict(tag_and_dist))

    top_4_places = [x[0] for x in tag_and_dist[:4]]

    g = build_graph(graph_path)

    topn_nodes = []

    for label in top_4_places:
        for node in g.nodes():
            _, _, _, node_label = node
            if (node_label == label):
                topn_nodes += [node]
                break

    #adding the actual position to the top4 nodes
    topn_nodes += [("pose", 7.3533, 0.5381, "corridor-1")]
    subgraph = nx.Graph()

    edges = list(itertools.combinations(g.subgraph(topn_nodes), 2))

    all_distances = dict(nx.all_pairs_shortest_path_length(g))

    edges_with_weight = [(topn_nodes.index(x[0]), topn_nodes.index(x[1]),
                          all_distances[x[0]][x[1]]) for x in edges]

    print(edges_with_weight)

    fitness_dists = mlrose.TravellingSales(distances=edges_with_weight)
    problem_fit = mlrose.TSPOpt(length=len(topn_nodes),
                                fitness_fn=fitness_dists,
                                maximize=False)
    best_state, best_fitness = mlrose.genetic_alg(problem_fit, random_state=2)

    path = [topn_nodes[x][3] for x in best_state]

    path = rotate(path, path.index('corridor-1'))

    print(path)
    def _make_model(self):
        pca = self.pca
        mCounts = np.int_(self.counts * self.seq_depth_factor)
        n_dim = pca.n_components_
        n_modes = self.n_modes
        n_samp = mCounts.shape[1]
        n_features = mCounts.shape[0]
        if self.kmeansInit:
            sd_factor = 2 / n_modes
        else:
            sd_factor = 2

        print("Defining model constants...")
        if pca.whiten:
            rot = np.sqrt(pca.explained_variance_[:, None]) * pca.components_
            rot = theano.shared(floatX(rot))
            cSd = floatX(1)
            tcov = np.eye(n_dim)[np.tril_indices(n_dim)] * sd_factor
        else:
            rot = theano.shared(floatX(pca.components_))
            cSd = floatX(np.sqrt(pca.explained_variance_))
            tcov = (np.diag(pca.explained_variance_)[np.tril_indices(n_dim)] *
                    sd_factor)
        shift = theano.shared(floatX(pca.mean_[None, :]),
                              broadcastable=(True, False))

        multiNn = np.sum(mCounts, axis=0)
        print("Counts shape:")
        print(mCounts.shape)
        lcounts = floatX(self.pca.transform(self.tau_log_E_p))
        print("Latent counts shape:")
        print(lcounts.shape)
        high_tumor = self.pheno["tcEst"] > 0.8
        low_tumor = self.pheno["tcEst"] < 0.2
        if self.kmeansInit:
            km = KMeans(n_clusters=n_modes,
                        random_state=0,
                        tol=1e-10,
                        max_iter=100)
            mus_tumor = km.fit(lcounts[high_tumor, :]).cluster_centers_
            mus_free = km.fit(lcounts[low_tumor, :]).cluster_centers_
        else:
            mus_tumor = np.repeat(np.mean(lcounts[high_tumor, :],
                                          axis=0)[None, :],
                                  10,
                                  axis=0)
            mus_free = np.repeat(np.mean(lcounts[low_tumor, :],
                                         axis=0)[None, :],
                                 10,
                                 axis=0)
        mus_tumor = floatX(mus_tumor)
        mus_free = floatX(mus_free)
        try:
            chol_tumor = floatX(
                np.linalg.cholesky(np.cov(lcounts[high_tumor, :].T)))
            chol_tumor = chol_tumor[np.tril_indices(n_dim)] * sd_factor
        except np.linalg.LinAlgError:
            print(
                "Seems we have to few HIGH tumor content samples to infer a starting covariance."
            )
            chol_tumor = tcov
        try:
            chol_free = floatX(
                np.linalg.cholesky(np.cov(lcounts[low_tumor, :].T)))
            chol_free = chol_free[np.tril_indices(n_dim)] * sd_factor
        except np.linalg.LinAlgError:
            print(
                "Seems we have to few LOW tumor content samples to infer a starting covariance."
            )
            chol_free = tcov
        md = self.tau_log_E_p - pca.mean_[None, :]
        dev = md - np.dot(np.dot(md, pca.components_.T), pca.components_)
        dev_std = np.std(dev, axis=0)
        dev_mean = np.mean(dev, axis=0)
        if self.no_deviations is True:
            dev_f = dev_t = None
        else:
            dev_f = dev_t = theano.shared(floatX(dev))

        p_f = floatX(self.p_f)
        p_t = floatX(self.p_t)
        sparsity = floatX(1)
        n = floatX(self.pheno["tcRes"].values[:, None] * self.res_scale)
        tc = floatX(self.pheno["tcEst"].values[:, None])
        lb = floatX(1 - p_f)
        ub = floatX(p_t)
        padding = 1e-1 * (ub - lb)
        pa_start = ((n * tc) + 1) / (n + 2)
        pa_start = np.where(pa_start < lb, lb + padding, pa_start)
        pa_start = np.where(pa_start > ub, ub - padding, pa_start)
        pa_start = floatX(pa_start)

        def inverse_pca(X):
            return pm.math.dot(X, rot) + shift

        def pa2alpha(p_a):
            return (p_a + p_f - 1) / (p_t + p_f - 1)

        def alpha2pa(alpha):
            return (alpha * (p_t + p_f - 1)) - p_f + 1

        def mixSep(x_f, x_t, alpha, dev_f, dev_t):
            exp_f = inverse_pca(x_f)
            exp_t = inverse_pca(x_t)
            if dev_f is not None:
                exp_f += dev_f
            if dev_t is not None:
                exp_t += dev_t
            exp_f = tt.nnet.softmax(exp_f)
            exp_t = tt.nnet.softmax(exp_t)
            result = ((1 - alpha) * exp_f) + (alpha * exp_t)
            return result

        print("Making model...")
        with pm.Model() as model:
            # bounds with nummerical padding
            p_a = pm.Beta(
                "p_a",
                alpha=floatX((n * tc) + 1),
                beta=floatX((n * (1 - tc)) + 1),
                transform=pm.distributions.transforms.Interval(lb, ub),
                shape=(n_samp, 1),
                testval=pa_start,
            )
            alpha = pm.Deterministic("alpha", pa2alpha(p_a))
            sdd = pm.HalfNormal.dist(sd=cSd * self.relax_prior)

            x_f_comps = list()
            for i in range(n_modes):
                mus_f = pm.Normal(
                    "mus_f_{}".format(i),
                    mu=0,
                    sd=cSd * self.relax_prior,
                    shape=n_dim,
                    testval=mus_free[i, :],
                )
                packed_L_f = pm.LKJCholeskyCov(
                    "packed_L_f_{}".format(i),
                    n=n_dim,
                    eta=sparsity,
                    sd_dist=sdd,
                    testval=chol_free,
                )
                chol_f = pm.expand_packed_triangular(n_dim,
                                                     packed_L_f,
                                                     lower=True)
                x_f_comps.append(
                    pm.MvNormal.dist(mu=mus_f,
                                     chol=chol_f,
                                     shape=(n_samp, n_dim)))
            if n_modes > 1:
                w_f = pm.Dirichlet("w_f",
                                   a=np.ones(n_modes) * self.dirichlet_prior)
                x_f = pm.Mixture(
                    "x_f",
                    w=w_f,
                    comp_dists=x_f_comps,
                    shape=(n_samp, n_dim),
                    testval=lcounts,
                )
            else:
                x_f = pm.MvNormal("x_f",
                                  mu=mus_f,
                                  chol=chol_f,
                                  shape=(n_samp, n_dim))

            if self.same_kernels:
                x_t_comps = x_f_comps
            else:
                x_t_comps = list()
                for i in range(n_modes):
                    mus_t = pm.Normal(
                        "mus_t_{}".format(i),
                        mu=0,
                        sd=cSd * self.relax_prior,
                        shape=n_dim,
                        testval=mus_tumor[i, :],
                    )
                    packed_L_t = pm.LKJCholeskyCov(
                        "packed_L_t_{}".format(i),
                        n=n_dim,
                        eta=sparsity,
                        sd_dist=sdd,
                        testval=chol_tumor,
                    )
                    chol_t = pm.expand_packed_triangular(n_dim,
                                                         packed_L_t,
                                                         lower=True)
                    x_t_comps.append(
                        pm.MvNormal.dist(mu=mus_t,
                                         chol=chol_t,
                                         shape=(n_samp, n_dim)))
            if n_modes > 1:
                w_t = pm.Dirichlet("w_t",
                                   a=np.ones(n_modes) * self.dirichlet_prior)
                x_t = pm.Mixture(
                    "x_t",
                    w=w_t,
                    comp_dists=x_t_comps,
                    shape=(n_samp, n_dim),
                    testval=lcounts,
                )
            else:
                x_t = pm.MvNormal("x_t",
                                  mu=mus_t,
                                  chol=chol_t,
                                  shape=(n_samp, n_dim))

            if self.sample_deviation is True:
                dev_f = pm.Normal(
                    "dev_f",
                    mu=dev_mean,
                    sigma=dev_std,
                    shape=(n_samp, n_features),
                    testval=dev,
                )
                dev_t = pm.Normal(
                    "dev_t",
                    mu=dev_mean,
                    sigma=dev_std,
                    shape=(n_samp, n_features),
                    testval=dev,
                )

            if self.hazard_model == "cox":
                b = pm.Normal("logHR", mu=0, sigma=1, shape=(2 * n_dim, 1))
                for ev in self.events:
                    ind = ev["mask"].values
                    obs = np.array(ev["index_among"])
                    expressions = tt.concatenate([x_t[ind, :], x_f[ind, :]],
                                                 axis=1)
                    hazard = tt.exp(tt.dot(expressions, b)).T
                    evp = pm.Categorical("event_{}".format(ev["sample"]),
                                         hazard,
                                         observed=obs)
            elif self.hazard_model == "mk":
                # This in not implemented and aims to model hazard with a gaussian mixture
                b = pm.Normal("kernel_weights", mu=0, sigma=1, shape=(10, ))
                pass

            x = pm.Deterministic("x", mixSep(x_f, x_t, alpha, dev_f, dev_t))
            if self.use_multinomial:
                obs = pm.Multinomial("obs",
                                     n=multiNn,
                                     p=x,
                                     observed=mCounts.T,
                                     dtype="int64")
            else:
                dist = pm.Dirichlet.dist(mCounts.T + 1)
                pot = pm.Potential("obs", dist.logp(x))
        return model
Пример #24
0
        #tau = pm.Normal('tau', mu=0, tau=1e-4)
        #p1 = pm.Deterministic('P_SI', 1-pm.math.exp(-Lambda))
        #p2 = pm.Deterministic('P_IR', 1-pm.math.exp(-gamma))
        #p3 = pm.Deterministic('P_ID', 1-pm.math.exp(-tau))
        p1 = pm.Beta('P_SI', alpha=0.5, beta=0.5)
        p2 = pm.Dirichlet('P_IRD', a=np.ones(3))
        #p2 = pm.Beta('P_IR', alpha = 0.5, beta = 0.5)
        #p3 = pm.Beta('P_ID', alpha = 0.5, beta = 0.5)
        SI = pm.Binomial('SI',
                         n=delta[:seg, 3].astype(np.int32),
                         p=p1,
                         observed=delta[:seg, 0])
        IRD = pm.Multinomial(
            'IRD',
            n=delta[:seg, 4].astype(np.int32),
            p=p2,
            observed=np.stack(
                (delta[:seg, 4] - delta[:seg, 1] - delta[:seg, 2],
                 delta[:seg, 1], delta[:seg, 2]),
                axis=-1))
        #IR = pm.Binomial('IR', n = delta[:,4].astype(np.int32), p = p2, observed=delta[:,1])
        #ID = pm.Binomial('ID', n = delta[:,4].astype(np.int32), p = p3, observed=delta[:,2])

        #step = pm.NUTS()
        trace1 = pm.sample(2000,
                           pm.Metropolis(),
                           cores=1,
                           chains=2,
                           init='advi+adapt_diag',
                           tune=500)

    with basic_model2:
Пример #25
0
            symps[i] = pm.Bernoulli(
                'symp' + str(i),
                p=meanValues[i]).random()  # values not just probabilities

        sympsTheano = theano.shared(np.array(symps).astype("float64"))
        genderType = pm.Bernoulli('gender', p=meanValues[i]).random()

        true_probs = [type1success, type2success, type3success, type4success]
        true_probs1 = pm.Normal('p1', mu=type1success, sigma=0.01)
        true_probs2 = pm.Normal('p2', mu=type2success, sigma=0.01)
        true_probs3 = pm.Normal('p3', mu=type3success, sigma=0.01)
        true_probs4 = pm.Normal('p4', mu=type4success, sigma=0.01)
        cancerTypeValues = [1, 2, 3, 4]
        cancerType = pm.Multinomial(
            'cancer type',
            n=1,
            p=[type1success, type2success, type3success, type4success],
            shape=4).random()
        indexCancer = np.where(cancerType == 1)[0]
        cancerTypeGeneratedSample = cancerTypeValues[indexCancer[0]]
        age = pm.Normal('age',
                        mu=meanValues[-1],
                        sigma=(maxAge - meanValues[-1])).random()
        cov = np.cov(DataAccessProcessedSymptopmsOnly.T)  #
        featurenumber = 41

        x = pm.math.stack(cancerTypeGeneratedSample, genderType, age)
        allMu = pm.math.concatenate([sympsTheano, x], axis=0)
        test = pm.MvNormal('out', mu=allMu, cov=cov, shape=featurenumber)
        returns = test
        step = pm.HamiltonianMC()
Пример #26
0
import numpy as np
import pymc3 as pm
import pandas as pd

## Delay Model

DELAY_DIST = True
if DELAY_DIST == True:

    k = np.array(train_n_t_d).shape[1]

    with pm.Model() as multinom_test:
        a = pm.Dirichlet('a', a=np.ones(k))
        for i in range(len(train_n_t_d)):
            data_pred = pm.Multinomial('data_pred_%s' % i,
                                       n=sum(train_n_t_d[i]),
                                       p=a,
                                       observed=train_n_t_d[i])
        trace = pm.sample(50000, pm.Metropolis())
        #trace = pm.sample(1000) # also works with NUTS

    pm.traceplot(trace[500:])

state_trajectories = []
PF = False
if PF:
    N = 10000
    state_space_dimension = 1

    params = []
    means, particles, weights = run_pf(train_n_t_inf, N, state_space_dimension,
                                       D, params)
Пример #27
0
                              tt.tanh(tt.dot(ann_input, weights_1)))

    # Layer 1 -> Layer 2
    weights_2 = pm.Normal('w_2',
                          mu=0,
                          sd=1,
                          shape=(n_hidden, n_hidden),
                          testval=init_2)
    acts_2 = pm.Deterministic('activations_2',
                              tt.tanh(tt.dot(acts_1, weights_2)))

    # Layer 2 -> Output Layer
    weights_out = pm.Normal('w_out',
                            mu=0,
                            sd=1,
                            shape=(n_hidden, ann_output.shape[1]),
                            testval=init_out)
    acts_out = pm.Deterministic('activations_out',
                                tt.nnet.softmax(tt.dot(acts_2,
                                                       weights_out)))  # noqa

    # Define likelihood
    out = pm.Multinomial('likelihood', n=1, p=acts_out, observed=ann_output)

with nn_model:
    s = theano.shared(pm.floatX(1.1))
    inference = pm.ADVI(
        cost_part_grad_scale=s)  # approximate inference done using ADVI
    approx = pm.fit(100000, method=inference)
    trace = approx.sample(5000)
def get_posterior(data,
                  n=100,
                  draws=2000,
                  n_init=200000,
                  progressbar=True,
                  *args,
                  **kwargs):
    with pm.Model() as model:

        # Define Priors
        p_err = pm.Uniform('p_err', 0, 0.1)  # Upper limit due to normalization
        p_ent = pm.Uniform('p_ent', 0, 1 - 6 * p_err)
        p_a = pm.Uniform('p_a', 0, 1 - 6 * p_err - p_ent)
        p_e = pm.Uniform('p_e', 0, 1 - 6 * p_err - p_ent)
        p_o = pm.Uniform('p_o', 0, 1 - 6 * p_err - p_ent)
        p_i = pm.Uniform('p_i', 0, 1 - 6 * p_err - p_ent)
        nvc_a = pm.Deterministic('nvc_a', 1 - p_a - 6 * p_err - p_ent)
        nvc_i = pm.Deterministic('nvc_i', 1 - p_i - 6 * p_err - p_ent)
        nvc_e = pm.Deterministic('nvc_e', 1 - p_e - 6 * p_err - p_ent)
        nvc_o = pm.Deterministic('nvc_o', 1 - p_o - 6 * p_err - p_ent)

        # Model specification: define all possible moods
        # syll tt-syntax a     i       e      o     NVC
        aa = [p_a, p_ent, p_err, p_err, nvc_a]
        ai = [p_err, p_a, p_err, p_ent, nvc_a]
        ia = ai
        ae = [p_err, p_err, p_a, p_ent, nvc_a]
        ea = ae
        ao = [p_err, p_ent, p_err, p_a, nvc_a]
        oa = ao
        ii = [p_err, p_i, p_err, p_ent, nvc_i]
        ie = [p_err, p_err, p_i, p_ent, nvc_i]
        ei = ie
        io = [p_err, p_ent, p_err, p_i, nvc_i]
        oi = io
        ee = [p_err, p_err, p_e, p_ent, nvc_e]
        eo = [p_err, p_ent, p_err, p_e, nvc_e]
        oe = eo
        oo = [p_err, p_ent, p_err, p_o, nvc_o]

        # Define the relationship between moods and syllogisms
        moods = [
            aa, ai, ae, ao, ia, ii, ie, io, ea, ei, ee, eo, oa, oi, oe, oo
        ]
        syllogs = []
        for m in moods:
            # Figure 1
            line = m[0:4] + [p_err] * 4 + [m[-1]]
            syllogs += [line]
            # Figure 2
            line = [p_err] * 4 + m[0:4] + [m[-1]]
            syllogs += [line]

            line = []
            for para in m[0:4]:
                if para == p_err:
                    line += [p_err]
                else:
                    line += [para / 2]
            # Paste this two times
            line *= 2
            # Add NVC
            line += [m[-1]]

            syllogs += [line] * 2

        model_matrix = tt.stack(syllogs)

        # Define likelihood
        pm.Multinomial(name='rates', n=n, p=model_matrix, observed=data)
        map_estimate = pm.find_MAP(model=model)

        trace = pm.sample(draws=draws,
                          njobs=1,
                          start=map_estimate,
                          n_init=n_init,
                          progressbar=progressbar)

        print('Model logp = ', model.logp(map_estimate))
        return model, trace
def build_polyallelic_model(n, g, s, a=4):
    with pm.Model() as model:
        # Fraction
        pi = pm.Dirichlet('pi',
                          a=np.ones(s),
                          shape=(n, s),
                          transform=stick_breaking)
        pi_hyper = pm.Data('pi_hyper', value=0.0)
        pm.Potential(
            'heterogeneity_penalty',
            # NOTE: we take the mean sqrt over the first axis so that
            # it's invariant to the number of samples, but sum over
            # the second axis so that it's affected by the actual
            # number of latent strains, not the number allowed.
            -(pm.math.sqrt(pi).mean(0).sum()**2) * pi_hyper)

        rho_hyper = pm.Data('rho_hyper', value=0.0)
        pm.Potential(
            'diversity_penalty',
            # NOTE: we take the mean over the first axis *before*
            # taking the sqrt because we're interested in mean
            # abundances of each strain.  This means that it *will* be
            # affected by the choice of samples.
            # We later sum over (instead of taking the mean over) the
            # second axis so that it's affected by the actual number
            # of latent strains, not the number allowed (s).
            -(pm.math.sqrt(pi.mean(0)).sum()**2) * rho_hyper)

        # Genotype
        gamma_ = pm.Dirichlet('gamma_',
                              a=np.ones(a),
                              shape=(g * s, a),
                              transform=stick_breaking)
        gamma = pm.Deterministic('gamma', gamma_.reshape((g, s, a)))
        gamma_hyper = pm.Data('gamma_hyper', value=0.0)
        pm.Potential(
            'ambiguity_penalty',
            # NOTE: we're taking the norm over the third axis.
            # Then, after returning to the original scale, we take the
            # mean over the first axis so that it is invariant to
            # numbers of positions and finally we sum over the second
            # axis so that each strain has an equal impact, regardless
            # of the number of strains.
            -(pm.math.sqrt(gamma).sum(2)**2).mean(0).sum(0) * gamma_hyper)
        # NOTE: As a general rule, we sum over those dimensions where we want
        # to force all of the weight to one element (e.g. alleles or strains).
        # Conversely, we take the mean of dimensions that we want to be
        # invariant to decisions/facts that are independent of the goodness of
        # fit (e.g. number of positions or samples).

        # Product of fraction and genotype
        true_p = pm.Deterministic('true_p', pm.math.dot(pi, gamma))

        # Sequencing error
        epsilon_hyper = pm.Data('epsilon_hyper', value=100)
        epsilon = pm.Beta('epsilon', alpha=2, beta=epsilon_hyper, shape=n)
        epsilon_ = epsilon.reshape((n, 1, 1))
        err_base_prob = tt.ones((n, g, a)) / a
        p_with_error = (true_p * (1 - epsilon_)) + (err_base_prob * epsilon_)

        # Observation
        observed = pm.Data('observed', value=np.empty((g * n, a)))
        pm.Multinomial('data',
                       p=p_with_error.reshape((-1, a)),
                       n=observed.reshape((-1, a)).sum(1),
                       observed=observed)

    return model
Пример #30
0
y = np.array([
    ['GD', 2012, 70, 349, 342],
    ['AH', 604, 23, 129, 594],
    ['JC', 508, 15, 181, 220],
    ['MF', 487, 34, 90, 363],
    ['AN', 473, 16, 68, 722],
    ['CC', 224, 16, 28, 94],
    ['HA', 122, 7, 19, 112],
    ['DP', 83, 6, 22, 81],
    ['PE', 82, 5, 9, 28]
])

df = pd.DataFrame(y, columns=['name', 'high', 'junk', 'low', 'medium'])
num_experiments = len(y)
qa_tiers = ['High', 'Junk', 'Low', 'Medium']

df.index = df.name
df = df.drop(['name'], axis=1)

number_of_experiments = len(y)
k = 4
sample_size = -1

with pm.Model() as multinom_test:
    a = pm.Dirichlet('a', a=np.ones(k))
    data_pred = pm.Multinomial('data_pred', n=number_of_experiments, p=a, observed=df, shape=y.shape)
    trace = pm.sample(1000)
    
pm.traceplot(trace[500:])