Exemplo n.º 1
0
    def inference(self, iter_=5000, burn=1000):
        theta = pm.Container([
            pm.CompletedDirichlet(
                "theta_%s" % d, pm.Dirichlet("ptheta_%s" % d,
                                             theta=self.alpha))
            for d in range(self.D)
        ])
        phi = pm.Container([
            pm.CompletedDirichlet("phi_%s" % k,
                                  pm.Dirichlet("pphi_%s" % k, theta=self.beta))
            for k in range(self.K)
        ])
        z_d = pm.Container([
            pm.Categorical("z_%s" % d,
                           p=theta[d],
                           value=np.random.randint(self.K,
                                                   size=len(self.bw[d])),
                           size=len(self.bw[d])) for d in range(self.D)
        ])
        w_z = pm.Container([
            pm.Categorical("w_%s_%s" % (d, w),
                           p=phi[z_d[d][w].get_value()],
                           value=self.bw[d][w],
                           observed=True) for d in range(self.D)
            for w in range(len(self.bw[d]))
        ])

        model = pm.Model([theta, phi, z_d, w_z])
        self.mcmc = pm.MCMC(model)
        self.mcmc.sample(iter=iter_, burn=burn)
Exemplo n.º 2
0
    def get_z_data(self, p, p_pos, q):
        K = 2  # Num topics
        M = p  # Num documents
        N = q  # Total num of unique words across all documents

        alpha = 1.0  # Concentration parameter for distribution over
        # distributions over words (one for each topic)
        beta = 1.0  # Concentration parameter for distribution over
        # distributions over topics (one for each
        # document)

        phi = pymc.Container([
            pymc.CompletedDirichlet(
                name="phi_" + str(k),
                D=pymc.Dirichlet(name="phi_temp_" + str(k),
                                 theta=beta * numpy.ones(N)),
            ) for k in range(K)
        ])

        theta = pymc.Container([
            pymc.CompletedDirichlet(
                name="theta_" + str(m),
                D=pymc.Dirichlet(name="theta_temp_" + str(m),
                                 theta=alpha * numpy.ones(K)),
            ) for m in range(M)
        ])

        z = pymc.Container([
            pymc.Categorical(name="z_" + str(m), p=theta[m], size=N)
            for m in range(M)
        ])

        w = pymc.Container([
            pymc.Categorical(
                name="w_" + str(m) + "_" + str(n),
                p=pymc.Lambda(
                    "phi_z_" + str(m) + str(n),
                    lambda z_in=z[m][n], phi_in=phi: phi_in[z_in],
                ),
            ) for m in range(M) for n in range(N)
        ])
        lda = pymc.Model([w, z, theta, phi])

        z_rvs = []
        for m in range(M):
            metadata = {"doc_idx": m, "num_unique_words": N}
            rv = WordCountVecRV(
                model=lda, name="w_0_0",
                metadata=metadata)  # Note: w_0_0 is just a dummy
            # argument that must be present in
            # the pymc.Model
            z_rvs += [rv]
        return z_rvs
Exemplo n.º 3
0
    def __init__(self, corpus, K=10, iterations=1000, burn=100):
        print("Building model ...")
        self.K = K
        self.V = corpus.wordCount + 1
        self.M = corpus.documentCount
        self.alpha = np.ones(self.K)
        self.beta = np.ones(self.V)
        self.corpus = corpus
        self.observations = np.array(corpus.observations)

        self.phi = np.empty(self.K, dtype=object)
        for i in range(self.K):
            self.phi[i] = pm.CompletedDirichlet(
                "Phi[%i]" % i, pm.Dirichlet("phi[%i]" % i, theta=self.beta))
        self.phi = pm.Container(self.phi)

        self.theta = np.empty(self.M, dtype=object)
        for i in range(self.M):
            self.theta[i] = pm.CompletedDirichlet(
                "Theta[%i]" % i, pm.Dirichlet("theta[%i]" % i,
                                              theta=self.alpha))
        self.theta = pm.Container(self.theta)

        self.z = np.empty(self.observations.shape, dtype=object)
        for i in range(self.M):
            self.z[i] = pm.Categorical("z[%i]" % i,
                                       size=len(self.observations[i]),
                                       p=self.theta[i],
                                       value=np.random.randint(
                                           self.K,
                                           size=len(self.observations[i])))
        self.z = pm.Container(self.z)

        self.w = []
        for i in range(self.M):
            self.w.append([])
            for j in range(len(self.observations[i])):
                self.w[i].append(
                    pm.Categorical(
                        "w[%i][%i]" % (i, j),
                        p=pm.Lambda(
                            "phi[z[%i][%i]]" % (i, j),
                            lambda z=self.z[i][j], phi=self.phi: phi[z]),
                        value=self.observations[i][j],
                        observed=True))
        self.w = pm.Container(self.w)

        self.mcmc = pm.MCMC(pm.Model([self.theta, self.phi, self.z, self.w]))

        print("Fitting model ...")
        self.mcmc.sample(iter=iterations, burn=burn)
Exemplo n.º 4
0
def test_multinomial_check_parameters():

    x = np.array([1, 5])
    n = x.sum()

    with pm.Model() as modelA:
        p_a = pm.Dirichlet("p", floatX(np.ones(2)))
        MultinomialA("x", n, p_a, observed=x)

    with pm.Model() as modelB:
        p_b = pm.Dirichlet("p", floatX(np.ones(2)))
        MultinomialB("x", n, p_b, observed=x)

    assert np.isclose(modelA.logp({"p_simplex__": [0]}),
                      modelB.logp({"p_simplex__": [0]}))
Exemplo n.º 5
0
def run_Categorical_Normal():
    nC = 3
    #Num. Clusters
    aD = [0, 1, 8, 9, 20, 21]
    #Data Points
    nPts = len(aD) + 1
    #Clusters
    aUh = [
        pm.Uniform('UnifH' + str(i), lower=-50, upper=50) for i in range(nC)
    ]
    # @UndefinedVariable
    aNc = [pm.Normal('NormC' + str(i), mu=aUh[i], tau=1) for i in range(nC)]
    # @UndefinedVariable
    #Dirichlet & Categorical Nodes
    Dir = pm.Dirichlet('Dirichlet', theta=[1] * nC)
    # @UndefinedVariable
    aC = [pm.Categorical('Cat' + str(i), Dir) for i in range(nPts)]
    # @UndefinedVariable
    aL = [
        pm.Lambda('p_Norm' + str(i), lambda k=aC[i], aNcl=aNc: aNcl[int(k)])
        for i in range(nPts)
    ]
    # @UndefinedVariable
    #Points
    aN = [
        pm.Normal('NormX' + str(i),
                  mu=aL[i],
                  tau=1,
                  observed=True,
                  value=aD[i]) for i in range(nPts - 1)
    ]
    # @UndefinedVariable
    Nz = pm.Normal('NormZ', mu=aL[-1], tau=1)
    # @UndefinedVariable
    return np.concatenate([[Nz, Dir], aUh, aNc, aC, aN])
Exemplo n.º 6
0
def getModel():
    nA, nK = 0.05, 4
    aDir = [nA / nK] * nK
    D = pm.Dirichlet('1-Dirichlet', theta=aDir)
    #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D)
    #@UndefinedVariable
    #     C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable
    #     C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable
    #     C4 = pm.Categorical('14-Cat', D); #@UndefinedVariable
    #     C5 = pm.Categorical('15-Cat', D); #@UndefinedVariable
    #     G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5); #@UndefinedVariable
    N0_1 = pm.Normal('5-Norm0_1', mu=10, tau=1)
    #@UndefinedVariable
    N0_2 = pm.Normal('6-Norm0_2', mu=-10, tau=1)
    #@UndefinedVariable
    N0_3 = pm.Normal('7-Norm0_3', mu=30, tau=1)
    #@UndefinedVariable
    N0_4 = pm.Normal('16-Norm0_3', mu=-30, tau=1)
    #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value, N0_4.value]
    p_N1 = pm.Lambda('p_Norm1', lambda n=C1: aMu[n], doc='Pr[Norm|Cat]')
    #     p_N2 = pm.Lambda('p_Norm2', lambda n=C2: aMu[n], doc='Pr[Norm|Cat]');
    #     p_N3 = pm.Lambda('p_Norm3', lambda n=C3: aMu[n], doc='Pr[Norm|Cat]');
    #     p_N4 = pm.Lambda('p_Norm4', lambda n=C4: aMu[n], doc='Pr[Norm|Cat]');
    #     p_N5 = pm.Lambda('p_Norm6', lambda n=C5: aMu[n], doc='Pr[Norm|Cat]');
    N = pm.Normal('3-Norm', mu=p_N1, tau=1)
    #@UndefinedVariable
    #     obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable
    #     obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=40); #@UndefinedVariable @UnusedVariable
    #     obsN3 = pm.Normal('12-Norm', mu=p_N4, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable
    #     obsN4 = pm.Normal('13-Norm', mu=p_N5, tau=1, observed=True, value=-40); #@UndefinedVariable @UnusedVariable
    return pm.Model([D, C1, N, N0_1, N0_2, N0_3, N0_4, N])
Exemplo n.º 7
0
def mixture_model(random_seed=1234):
    """Sample mixture model to use in benchmarks"""
    np.random.seed(1234)
    size = 1000
    w_true = np.array([0.35, 0.4, 0.25])
    mu_true = np.array([0.0, 2.0, 5.0])
    sigma = np.array([0.5, 0.5, 1.0])
    component = np.random.choice(mu_true.size, size=size, p=w_true)
    x = np.random.normal(mu_true[component], sigma[component], size=size)

    with pm.Model() as model:
        w = pm.Dirichlet("w", a=np.ones_like(w_true))
        mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
        enforce_order = pm.Potential(
            "enforce_order",
            at.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) +
            at.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf),
        )
        tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape)
        pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x)

    # Initialization can be poorly specified, this is a hack to make it work
    start = {
        "mu": mu_true.copy(),
        "tau_log__": np.log(1.0 / sigma**2),
        "w_stickbreaking__": np.array([-0.03, 0.44]),
    }
    return model, start
Exemplo n.º 8
0
    def initialize_variables(self):
        """Initializes MCMC variables."""
        self.dirichlet = pymc.Dirichlet(
            "dirichlet", self.prior_pops
        )  # This has size (n-1), so it is missing the final component.
        self.matrix_populations = pymc.CompletedDirichlet(
            "matrix_populations", self.dirichlet
        )  # This RV fills in the missing value of the population vector, but has shape (1, n) rather than (n)
        self.populations = pymc.CommonDeterministics.Index(
            "populations", self.matrix_populations,
            0)  # Finally, we get a flat array of the populations.

        self.dirichlet.keep_trace = False

        @pymc.dtrm
        def mu(populations=self.populations):
            return populations.dot(self.predictions)

        self.mu = mu

        @pymc.potential
        def logp(populations=self.populations, mu=self.mu):
            return -0.5 * get_chi2(populations,
                                   self.predictions,
                                   self.measurements,
                                   self.uncertainties,
                                   mu=mu)

        self.logp = logp
Exemplo n.º 9
0
def run_HDP():
    nG, nA, nC = 2, 2, 2
    #Gamma, Alpha & Max No. Clusters
    aDir = [nG / nC] * nC
    Dir0 = pm.Dirichlet('Dirichlet0', theta=aDir)
    # @UndefinedVariable
    lDir0 = pm.Lambda('p_Dir0',
                      lambda d=Dir0: np.concatenate([d, [1 - sum(d)]]) * nA)
    # @UndefinedVariable
    aNodes1 = get_DP('1', lDir0, [0, 1, 20, 21])
    aNodes2 = get_DP('2', lDir0, [50, 51, 70, 71, 72])
    return np.concatenate([[Dir0], aNodes1, aNodes2])
Exemplo n.º 10
0
    def test_sample_prior_and_posterior(self):
        def build_toy_dataset(N, K):
            pi = np.array([0.2, 0.5, 0.3])
            mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]]
            stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]]
            x = np.zeros((N, 3), dtype=np.float32)
            y = np.zeros((N, ), dtype=np.int)
            for n in range(N):
                k = np.argmax(np.random.multinomial(1, pi))
                x[n, :] = np.random.multivariate_normal(
                    mus[k], np.diag(stds[k]))
                y[n] = k
            return x, y

        N = 100  # number of data points
        K = 3  # number of mixture components
        D = 3  # dimensionality of the data

        X, y = build_toy_dataset(N, K)

        with pm.Model() as model:
            pi = pm.Dirichlet("pi", np.ones(K), shape=(K, ))

            comp_dist = []
            mu = []
            packed_chol = []
            chol = []
            for i in range(K):
                mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D))
                packed_chol.append(
                    pm.LKJCholeskyCov("chol_cov_%i" % i,
                                      eta=2,
                                      n=D,
                                      sd_dist=pm.HalfNormal.dist(2.5)))
                chol.append(
                    pm.expand_packed_triangular(D, packed_chol[i], lower=True))
                comp_dist.append(
                    pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D))

            pm.Mixture("x_obs", pi, comp_dist, observed=X)
        with model:
            idata = pm.sample(30, tune=10, chains=1)

        n_samples = 20
        with model:
            ppc = pm.sample_posterior_predictive(idata, n_samples)
            prior = pm.sample_prior_predictive(samples=n_samples)
        assert ppc["x_obs"].shape == (n_samples, ) + X.shape
        assert prior["x_obs"].shape == (n_samples, ) + X.shape
        assert prior["mu0"].shape == (n_samples, D)
        assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
Exemplo n.º 11
0
def cartesian_categorical_child(name,
                                parents,
                                levels,
                                value=None,
                                N=None,
                                return_coeffs=False,
                                fixed={}):
    if value is None and N is None:
        raise ValueError('either "value" or "N" must be specified')
    if value is not None:
        value = mask_missing(value)

    ranges = [range(get_levels_count(p)) for p in parents]
    parents2index = {}
    coeffs = []
    for i, parent_vals in enumerate(product(*ranges)):
        parents2index[parent_vals] = i
        parents_repr = ' '.join('%s=%s' % (parent, v)
                                for parent, v in zip(parents, parent_vals))
        coeff_name = COEFFS_PREFIX + 'p(%s | %s)' % (name, parents_repr)
        coeff = fixed.get(coeff_name,
                          pymc.Dirichlet(coeff_name, theta=[1] * levels))
        coeffs.append(coeff)

    intify = lambda x: tuple(map(int, x))

    @pymc.deterministic
    def child_prob(parents=parents, coeffs=coeffs):
        probs = np.array([
            coeffs[parents2index[intify(parent_vals)]]
            for parent_vals in zip(*parents)
        ])
        remainders = 1 - probs.sum(axis=1)
        remainders = remainders.reshape((len(remainders), 1))
        return np.hstack([probs, remainders])

    child_prob.__name__ = 'p(%s)' % name

    if value is None:
        child = pymc.Categorical(name, p=child_prob, value=np.zeros(N))
    else:
        child = pymc.Categorical(name,
                                 p=child_prob,
                                 value=value,
                                 observed=True)
    set_levels_count(child, levels)

    if return_coeffs:
        return child, coeffs + [child_prob]
    else:
        return child
Exemplo n.º 12
0
def run_HDP():
    nC = 3
    #Max No. Clusters
    Gam = pm.Uniform('Gamma0', lower=0, upper=15)
    # @UndefinedVariable
    aDir = [Gam / nC] * nC
    Dir0 = pm.Dirichlet('Dirichlet0', theta=aDir)
    # @UndefinedVariable
    lDir0 = pm.Lambda('p_Dir0',
                      lambda d=Dir0: np.concatenate([d, [1 - sum(d)]]))
    # @UndefinedVariable
    aNodes1 = get_DP('1', lDir0, [0, 1, 20, 21])
    aNodes2 = get_DP('2', lDir0, [50, 51, 70, 71, 72])
    return np.concatenate([[Dir0], aNodes1, aNodes2])
Exemplo n.º 13
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[2, 1, 3, 1])
    #@UndefinedVariable
    #     p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]');
    C = pm.Categorical('2-Cat', D)
    #@UndefinedVariable
    #     C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable
    p_N = pm.Lambda('p_Norm',
                    lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3],
                                          [-5, 0, 5, 10]),
                    doc='Pr[Norm|Cat]')
    N = pm.Normal('3-Norm', mu=p_N, tau=1)
    #@UndefinedVariable
    #     N = pm.Normal('2-Norm', mu=p_N, tau=1, observed=True, value=2.5); #@UndefinedVariable
    return pm.Model([D, C, N])
Exemplo n.º 14
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[2, 1, 2, 4])
    #@UndefinedVariable
    #     p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]');
    C = pm.Categorical('2-Cat', D)
    #@UndefinedVariable
    #     C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable
    p_N = pm.Lambda(
        'p_Norm',
        lambda n=C: np.select([n == 0, n == 1, n == 2, n == 3],
                              [[-5, -5], [0, 0], [5, 5], [10, 10]]),
        doc='Pr[Norm|Cat]')
    N = pm.MvNormal('3-Norm_2D', mu=p_N, tau=np.eye(2, 2))
    #@UndefinedVariable
    #     N = pm.MvNormal('2-Norm_2D', mu=p_N, tau=np.eye(2,2), observed=True, value=[2.5,2.5]); #@UndefinedVariable
    return pm.Model([D, C, N])
Exemplo n.º 15
0
def gmm_model(data, K, mu_0=0.0, alpha_0=0.1, beta_0=0.1, alpha=1.0):
    """
    K: number of component
    n_samples: number of n_samples
    n_features: number of features

    mu_0: prior mean of mu_k 
    alpha_0: alpha of Inverse Gamma tau_k 
    beta_0: beta of Inverse Gamma tau_k
    alpha = prior of dirichlet distribution phi_0

    latent variable:
    phi_0: shape = (K-1, ), dirichlet distribution
    phi: shape = (K, ), add K-th value back to phi_0
    z: shape = (n_samples, ), Categorical distribution, z[k] is component indicator 
    mu_k: shape = (K, n_features), normal distribution, mu_k[k] is mean of k-th component
    tau_k : shape = (K, n_features), inverse-gamma distribution, tau_k[k] is variance of k-th component
    """

    n_samples, n_features = data.shape

    # latent variables
    tau_k = pm.InverseGamma('tau_k',
                            alpha_0 * np.ones((K, n_features)),
                            beta_0 * np.ones((K, n_features)),
                            value=beta_0 * np.ones((K, n_features)))
    mu_k = pm.Normal('mu_k',
                     np.ones((K, n_features)) * mu_0,
                     tau_k,
                     value=np.ones((K, n_features)) * mu_0)
    phi_0 = pm.Dirichlet('phi_0', theta=np.ones(K) * alpha)

    @pm.deterministic(dtype=float)
    def phi(value=np.ones(K) / K, phi_0=phi_0):
        val = np.hstack((phi_0, (1 - np.sum(phi_0))))
        return val

    z = pm.Categorical('z',
                       p=phi,
                       value=pm.rcategorical(np.ones(K) / K, size=n_samples))

    # observed variables
    x = pm.Normal('x', mu=mu_k[z], tau=tau_k[z], value=data, observed=True)

    return pm.Model([mu_k, tau_k, phi_0, phi, z, x])
Exemplo n.º 16
0
 def test_multivariate2(self):
     # Added test for issue #3271
     mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10)
     with pm.Model() as dm_model:
         probs = pm.Dirichlet("probs", a=np.ones(6))
         obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
         burned_trace = pm.sample(
             20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False
         )
     sim_priors = pm.sample_prior_predictive(
         return_inferencedata=False, samples=20, model=dm_model
     )
     sim_ppc = pm.sample_posterior_predictive(
         burned_trace, return_inferencedata=False, samples=20, model=dm_model
     )
     assert sim_priors["probs"].shape == (20, 6)
     assert sim_priors["obs"].shape == (20,) + mn_data.shape
     assert sim_ppc["obs"].shape == (20,) + mn_data.shape
Exemplo n.º 17
0
def create_mk_model(tree, chars, Qtype, pi):
    """
    Create model objects to be passed to pymc.MCMC

    Creates Qparams and likelihood function
    """
    if type(chars) == dict:
        chars = [chars[l] for l in [n.label for n in tree.leaves()]]
    nchar = len(set(chars))
    if Qtype=="ER":
        N = 1
    elif Qtype=="Sym":
        N = int(binom(nchar, 2))
    elif Qtype=="ARD":
        N = int((nchar ** 2 - nchar))
    else:
        ValueError("Qtype must be one of: ER, Sym, ARD")

    # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2
    if N != 1:
        theta = [1.0/2.0]*N
        Qparams_init = pymc.Dirichlet("Qparams_init", theta, value = [0.5])
        Qparams_init_full = pymc.CompletedDirichlet("Qparams_init_full", Qparams_init)
    else:
        Qparams_init_full = [[1.0]]

    # Exponential scaling factor for Qparams
    scaling_factor = pymc.Exponential(name="scaling_factor", beta=1.0, value=1.0)

    # Scaled Qparams; we would not expect them to necessarily add
    # to 1 as would be the case in a Dirichlet distribution
    @pymc.deterministic(plot=False)
    def Qparams(q=Qparams_init_full, s=scaling_factor):
        Qs = np.empty(N)
        for i in range(N):
            Qs[i] = q[0][i]*s
        return Qs

    l = mk.create_likelihood_function_mk(tree=tree, chars=chars, Qtype=Qtype,
                                  pi="Equal", findmin=False)
    @pymc.potential
    def mklik(q = Qparams, name="mklik"):
        return l(q)
    return locals()
Exemplo n.º 18
0
def make_categorical(name,
                     levels,
                     value=None,
                     N=None,
                     return_coeffs=False,
                     fixed={}):
    """ creates a Bernoulli random variable with a Dirichlet parent

    :param name: name of the variable
    :param levels: integer - how many levels does the variable have
    :param value: optional - list of observed values of the variable. Must consist of integers
        from 0 to levels - 1. May be a masked array - if the variable has missing values
    :param N: size of the variable (number of values). Either N or value must be specified
    :param return_coeffs: if true, will return the parent Beta variable as well as the bernoulli
        child. False by defaut.
    :param fixed: optional dictionary of values of coefficients to be fixed.
    :return: Categorical pymc random variable, or (if return_coeffs == True) a tuple
        (categorical variable; a list with a single element - the Dirichlet parent)
    """
    if value is None and N is None:
        raise ValueError('either "value" or "N" must be specified')
    if value is not None:
        value = mask_missing(value)

    N = N or len(value)
    coeff_name = COEFFS_PREFIX + 'p(%s)' % name
    if coeff_name in fixed:
        probs = fixed[coeff_name]
        parent = list(probs) + [1 - sum(probs)]
    else:
        parent = pymc.Dirichlet(coeff_name, theta=[1] * levels)

    if value is None:
        child = pymc.Categorical(name, p=parent, value=np.zeros(N))
    else:
        child = pymc.Categorical(name, p=parent, observed=True, value=value)

    set_levels_count(child, levels)

    if return_coeffs:
        return child, [parent]
    else:
        return child
Exemplo n.º 19
0
def get_DP(sDP, lDir0, aD):
    nPts = len(aD) + 1
    nC = len(lDir0.value)
    nMinD, nMaxD = min(aD), max(aD)
    #Clusters
    aUh = [
        pm.Uniform('UnifH' + str(i) + '_' + sDP,
                   lower=nMinD - 20,
                   upper=nMaxD + 20) for i in range(nC)
    ]
    # @UndefinedVariable
    aNc = [
        pm.Normal('NormC' + str(i) + '_' + sDP, mu=aUh[i], tau=1)
        for i in range(nC)
    ]
    # @UndefinedVariable
    #Dirichlet & Categorical Nodes
    Gam = pm.Uniform('Gamma1_' + sDP, lower=0, upper=15)
    # @UndefinedVariable
    Dir = pm.Dirichlet('Dirichlet1_' + sDP, theta=lDir0 * Gam)
    # @UndefinedVariable
    aC = [
        pm.Categorical('Cat' + str(i) + '_' + sDP, Dir) for i in range(nPts)
    ]
    # @UndefinedVariable
    aL = [
        pm.Lambda('p_Norm' + str(i) + '_' + sDP,
                  lambda k=aC[i], aNcl=aNc: aNcl[int(k)]) for i in range(nPts)
    ]
    # @UndefinedVariable
    #Points
    aN = [
        pm.Normal('NormX' + str(i) + '_' + sDP,
                  mu=aL[i],
                  tau=1,
                  observed=True,
                  value=aD[i]) for i in range(nPts - 1)
    ]
    # @UndefinedVariable
    Nz = pm.Normal('NormZ_' + sDP, mu=aL[-1], tau=1)
    # @UndefinedVariable
    return np.concatenate([[Nz, Dir], aUh, aNc, aC, aN])
Exemplo n.º 20
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[3,2,4]); #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D); #@UndefinedVariable
    C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable
    C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable
    W0_0 = pm.WishartCov('4-Wishart0_1', n=5, C=np.eye(2)); #@UndefinedVariable
    N0_1 = pm.MvNormalCov('5-Norm0_1', mu=[-20,-20], C=np.eye(2)); #@UndefinedVariable
    N0_2 = pm.MvNormalCov('6-Norm0_2', mu=[0,0], C=np.eye(2)); #@UndefinedVariable
    N0_3 = pm.MvNormalCov('7-Norm0_3', mu=[20,20], C=np.eye(2)); #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value];
    fL1 = lambda n=C1: np.select([n==0, n==1, n==2], aMu);
    fL2 = lambda n=C2: np.select([n==0, n==1, n==2], aMu);
    fL3 = lambda n=C3: np.select([n==0, n==1, n==2], aMu);
    p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]');
    p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]');
    p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]');
    N = pm.MvNormalCov('3-Norm', mu=p_N1, C=W0_0); #@UndefinedVariable
    obsN1 = pm.MvNormalCov('8-Norm', mu=p_N2, C=W0_0, observed=True, value=[-20,-20]); #@UndefinedVariable @UnusedVariable
    obsN2 = pm.MvNormalCov('9-Norm', mu=p_N3, C=W0_0, observed=True, value=[20,20]); #@UndefinedVariable @UnusedVariable
    return pm.Model([D,C1,C2,C3,N,W0_0,N0_1,N0_2,N0_3,N,obsN1,obsN2]);
Exemplo n.º 21
0
def get_Models():
    #Full Model (Dirichlet & Categorical)
    aAlphas = [1, 2, 8, 2]
    aD = [0, 3, 1]
    Dir = pm.Dirichlet('Dir', theta=aAlphas)
    # @UndefinedVariable
    CatD = [
        pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i])
        for i in range(len(aD))
    ]
    # @UndefinedVariable @UnusedVariable
    CatQ = pm.Categorical('CatQ', p=Dir)
    # @UndefinedVariable
    #Collapsed Model (Categorical)
    aP = []
    for i in range(len(aAlphas)):  #For each Category, get its probability p_i
        aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD)))
    CatQ2 = pm.Categorical('CatQ2', p=aP)
    # @UndefinedVariable
    return np.concatenate([[Dir, CatQ, CatQ2], CatD])
Exemplo n.º 22
0
def sample_prior_pops(num_frames, bootstrap_index_list):
    """Sample the prior populations using a Dirchlet random variable.

    Parameters
    ----------
    num_frames : int
        Number of conformations
    bootstrap_index_list : list([ndarray])
        List of arrays of frame indices.  The indices in bootstrap_index_list[i]
        will be perturbed together

    Returns
    -------
    prior_populations : ndarray, shape = (num_frames)
        Prior populations of each conformation

    Notes
    -------
    This function allows you to perform Bayesian bootstrapping by modifying
    the prior populations attached to each frame.  Because molecular dynamics
    frames are time correlated, one must first divide the dataset into
    temporal blocks.  A dirichlet random variable is then drawn to modify the prior
    populations blockwise.

    """
    num_blocks = len(bootstrap_index_list)

    prior_dirichlet = pymc.Dirichlet("prior_dirichlet",
                                     np.ones(num_blocks))  # Draw a dirichlet

    block_pops = np.zeros(num_blocks)
    block_pops[:
               -1] = prior_dirichlet.value[:]  # The pymc Dirichlet does not explicitly store the final component
    block_pops[-1] = 1.0 - block_pops.sum(
    )  # Calculate the final component from normalization.

    prior_populations = np.ones(num_frames)
    for k, ind in enumerate(bootstrap_index_list):
        prior_populations[ind] = block_pops[k] / len(ind)

    return prior_populations
Exemplo n.º 23
0
def run_DP():
    aD = [-10, -9, 10, 11, 20, 21, 42, 43]
    #Data Points
    #     nA, nC = 3, 3; #Alpha & Max No. Clusters
    nC = 5
    nPts = len(aD) + 1
    #Clusters
    aUh = [
        pm.Uniform('UnifH' + str(i), lower=-50, upper=50) for i in range(nC)
    ]
    # @UndefinedVariable
    #     Uh = pm.Uniform('UnifH', lower=-50, upper=60);  # @UndefinedVariable
    aNc = [pm.Normal('NormC' + str(i), mu=aUh[i], tau=1) for i in range(nC)]
    # @UndefinedVariable
    #Dirichlet & Categorical Nodes
    Gam = pm.Uniform('UnifG', lower=0, upper=15)
    # @UndefinedVariable
    #     Gam = pm.Gamma('Gamma', alpha=2.5, beta=2);  # @UndefinedVariable
    Dir = pm.Dirichlet('Dirichlet', theta=[Gam / nC] * nC)
    # @UndefinedVariable
    aC = [pm.Categorical('Cat' + str(i), Dir) for i in range(nPts)]
    # @UndefinedVariable
    aL = [
        pm.Lambda('p_Norm' + str(i), lambda k=aC[i], aNcl=aNc: aNcl[int(k)])
        for i in range(nPts)
    ]
    # @UndefinedVariable
    #Points
    aN = [
        pm.Normal('NormX' + str(i),
                  mu=aL[i],
                  tau=1,
                  observed=True,
                  value=aD[i]) for i in range(nPts - 1)
    ]
    # @UndefinedVariable
    Nz = pm.Normal('NormZ', mu=aL[-1], tau=1)
    # @UndefinedVariable
    return np.concatenate([[Nz, Dir, Gam], aUh, aNc, aC, aN])
Exemplo n.º 24
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[3, 2, 4])
    #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D)
    #@UndefinedVariable
    C2 = pm.Categorical('10-Cat', D)
    #@UndefinedVariable
    C3 = pm.Categorical('11-Cat', D)
    #@UndefinedVariable
    G0_0 = pm.Gamma('4-Gamma0_1', alpha=1, beta=1.5)
    #@UndefinedVariable
    U1 = pm.Uniform('12-Unif', lower=-100, upper=500)
    #@UndefinedVariable
    U2 = pm.Uniform('13-Unif', lower=-100, upper=500)
    #@UndefinedVariable
    U3 = pm.Uniform('14-Unif', lower=-100, upper=500)
    #@UndefinedVariable
    N0_1 = pm.Normal('5-Norm0_1', mu=U1, tau=1)
    #@UndefinedVariable
    N0_2 = pm.Normal('6-Norm0_2', mu=U2, tau=1)
    #@UndefinedVariable
    N0_3 = pm.Normal('7-Norm0_3', mu=U3, tau=1)
    #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value]
    fL1 = lambda n=C1: np.select([n == 0, n == 1, n == 2], aMu)
    fL2 = lambda n=C2: np.select([n == 0, n == 1, n == 2], aMu)
    fL3 = lambda n=C3: np.select([n == 0, n == 1, n == 2], aMu)
    p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]')
    p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]')
    p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]')
    N = pm.Normal('3-Norm', mu=p_N1, tau=1)
    #@UndefinedVariable
    obsN1 = pm.Normal('8-Norm', mu=p_N2, tau=1, observed=True, value=0)
    #@UndefinedVariable @UnusedVariable
    obsN2 = pm.Normal('9-Norm', mu=p_N3, tau=1, observed=True, value=150)
    #@UndefinedVariable @UnusedVariable
    return pm.Model(
        [D, C1, C2, C3, N, G0_0, N0_1, N0_2, N0_3, N, obsN1, obsN2])
Exemplo n.º 25
0
def get_Models():
    #Full Model (DP [Dirichlet] & Categorical)
    aD = [1, 0, 1]
    #Data Points
    nA, nC = 0.3, 3
    #Alpha & Max No. Clusters
    aAlphas = [nA / nC] * nC
    Dir = pm.Dirichlet('Dir', theta=aAlphas)
    # @UndefinedVariable
    CatD = [
        pm.Categorical('CatD_' + str(i), p=Dir, observed=True, value=aD[i])
        for i in range(len(aD))
    ]
    # @UndefinedVariable @UnusedVariable
    CatQ = pm.Categorical('CatQ', p=Dir)
    # @UndefinedVariable
    #Collapsed Model (Categorical)
    aP = []
    for i in range(len(aAlphas)):  #For each Category, get its probability p_i
        aP.append((aAlphas[i] + aD.count(i)) / (sum(aAlphas) + len(aD)))
    CatQ2 = pm.Categorical('CatQ2', p=aP)
    # @UndefinedVariable
    return np.concatenate([[Dir, CatQ, CatQ2], CatD])
Exemplo n.º 26
0
    state = pm.Bernoulli('state', p=0.5, shape=len(day))

    # Parameters
    alpha = pm.Normal('alpha', mu=0, tau=1E-3, shape=len(mtag[0]))
    beta = pm.Normal('beta', mu=0, tau=1E-3, shape=len(mtag[0]))

    ## Softmax
    def invlogit(x):
        return T.tensor.nnet.softmax(x)

    theta = np.empty(len(day), object)
    p_vec = np.empty(len(day), object)
    track_lk = np.empty(len(day), object)

    ## empty theta
    for i, j in enumerate(day):
        theta[i] = alpha + T.dot(state[i], beta.T)
        p_vec[i] = invlogit(theta[i])

        # Data likelihood
        track_lk[i] = pm.Dirichlet('track_lk',
                                   a=p_vec[i],
                                   shape=len(mtag[0]),
                                   observed=mtag[i])

with per_model:
    # start = pm.find_MAP()
    step = pm.Metropolis()
    nsteps = 1000
    trace = pm.sample(nsteps, step)
Exemplo n.º 27
0
def buildGaussMixture1DModel(halos, ngauss, modeltype='ratio'):

    parts = {}

    ### PDF handling

    massnorm = 1e15

    masses = halos[0]['masses']
    nmasses = len(masses)

    nclusters = len(halos)
    delta_masses = np.zeros((nclusters, nmasses - 1))
    delta_mls = np.zeros((nclusters, nmasses))
    pdfs = np.zeros((nclusters, nmasses))

    #also need to collect some statistics, to init mixture model
    pdfmeans = np.zeros(nclusters)
    pdfwidths = np.zeros(nclusters)

    for i in range(nclusters):

        if modeltype == 'additive':
            delta_masses[i, :] = (masses[1:] - masses[:-1]) / massnorm
            delta_mls[i, :] = (masses - halos[i]['true_mass']) / massnorm
            pdfs[i, :] = halos[i][
                'pdf'] * massnorm  #preserve unitarity under integration
        elif modeltype == 'ratio':
            delta_masses[i, :] = (masses[1:] -
                                  masses[:-1]) / halos[i]['true_mass']
            delta_mls[i, :] = masses / halos[i]['true_mass']
            pdfs[i, :] = halos[i]['pdf'] * halos[i]['true_mass']

        pdfmeans[i] = scipy.integrate.trapz(delta_mls[i, :] * pdfs[i, :],
                                            delta_mls[i, :])
        pdfwidths[i] = np.sqrt(
            scipy.integrate.trapz(
                pdfs[i, :] * (delta_mls[i, :] - pdfmeans[i])**2,
                delta_mls[i, :]))

    datacenter = np.mean(pdfmeans)
    dataspread = np.std(pdfmeans)
    datatypvar = np.mean(pdfwidths)
    dataminsamp = np.min(delta_masses)

    print datacenter, dataspread, datatypvar, dataminsamp

    #### Mixture model priors

    piprior = pymc.Dirichlet('piprior', np.ones(ngauss))
    parts['piprior'] = piprior

    mu0 = pymc.Uninformative(
        'mu0', datacenter + np.random.uniform(-5 * dataspread, 5 * dataspread))
    parts['mu0'] = mu0

    # kelly07 xvars prior.
    #    w2 = pymc.Uniform('w2', 0.1/dataspread**2., 100*max(1./dataspread**2, 1./datatypvar**2))
    #    print w2.parents
    #    parts['w2'] = w2
    #
    #
    #    xvars = pymc.InverseGamma('xvars', 0.5, 0.5*w2, size=ngauss+1)  #dropping the 1/2 factor on w2, because I don't think it matters

    logxsigma = pymc.Uniform('logxsigma',
                             np.log(2 * dataminsamp),
                             np.log(5 * dataspread),
                             size=ngauss + 1)
    parts['logxsigma'] = logxsigma

    @pymc.deterministic(trace=False)
    def xvars(logxsigma=logxsigma):
        return np.exp(logxsigma)**2

    parts['xvars'] = xvars

    @pymc.deterministic(trace=False)
    def tauU2(xvars=xvars):
        return 1. / xvars[-1]

    parts['tauU2'] = tauU2

    xmus = pymc.Normal('xmus', mu0, tauU2, size=ngauss)
    parts['xmus'] = xmus

    @pymc.observed
    def data(value=0.,
             delta_mls=delta_mls,
             delta_masses=delta_masses,
             pdfs=pdfs,
             piprior=piprior,
             xmus=xmus,
             xvars=xvars):

        #complete pi
        pis = pymc.extend_dirichlet(piprior)

        #        print pis

        #        #enforce identiability by ranking means
        #        for i in range(xmus.shape[0]-1):
        #            if (xmus[i] >= xmus[i+1:]).any():
        #                raise pymc.ZeroProbability
        #

        return dlntools.pdfGaussMix1D(delta_mls=delta_mls,
                                      delta_masses=delta_masses,
                                      pdfs=pdfs,
                                      pis=pis,
                                      mus=xmus,
                                      tau2=xvars[:-1])

    parts['data'] = data

    return parts
    beta = var_params[i * 2 + 1]
    var_obs.append(
        pymc.Gamma("var_obs{}".format(i),
                   alpha=alpha,
                   beta=beta,
                   value=means[i],
                   observed=True))
    var_pred.append(pymc.Gamma("var_pred{}".format(i), alpha=alpha, beta=beta))

    probs = [trans_params[i * n_states + j] for j in range(n_states)]

    @pymc.deterministic
    def params(probs=probs):
        return np.array(probs)

    trans_obs.append(
        pymc.Dirichlet("trans_obs{}".format(i),
                       params,
                       value=transitions[i],
                       observed=True))
    trans_pred.append(pymc.Dirichlet("trans_pred{}".format(i), params))

pred = mean_pred[:]
pred.extend(var_pred)
pred.extend(trans_pred)
model = pymc.Model(pred)

M = pymc.MCMC(model)
M.sample(1000, 200, 10)
M.db.close()
Exemplo n.º 29
0
    def _create_parameter_model(self, database, initial_parameters):
        """
        Creates set of stochastics representing the set of all parameters for all models

        Arguments
        ---------
        database : dict
            FreeSolv database
        initial_parameters : dict
            The set of initial values of the parameters

        Returns
        -------
        parameters : dict
            PyMC dictionary containing the parameters to sample.\
        """
        parameters = dict()  # just the parameters
        parameters['gbmodel_dir'] = pymc.Dirichlet('gbmodel_dir',
                                                   np.ones([self.ngbmodels]))
        parameters['gbmodel_prior'] = pymc.CompletedDirichlet(
            'gbmodel_prior', parameters['gbmodel_dir'])
        if self.ngbmodels == 5:
            parameters['gbmodel'] = pymc.Categorical(
                'gbmodel', value=4, p=parameters['gbmodel_prior'])
        else:
            parameters['gbmodel'] = pymc.Categorical(
                'gbmodel', p=parameters['gbmodel_prior'])
        uninformative_tau = 0.0001
        joint_proposal_sets = {}
        for (key, value) in initial_parameters.iteritems():
            (atomtype, parameter_name) = key.split('_')
            if parameter_name == 'scalingFactor':
                stochastic = pymc.Uniform(key,
                                          value=value,
                                          lower=-0.8,
                                          upper=+1.5)
            elif parameter_name == 'radius':
                stochastic = pymc.Uniform(key,
                                          value=value,
                                          lower=0.5,
                                          upper=2.5)
            elif parameter_name == 'alpha':
                stochastic = pymc.Normal(key,
                                         value=value,
                                         mu=value,
                                         tau=uninformative_tau)
            elif parameter_name == 'beta':
                stochastic = pymc.Normal(key,
                                         value=value,
                                         mu=value,
                                         tau=uninformative_tau)
            elif parameter_name == 'gamma':
                stochastic = pymc.Normal(key,
                                         value=value,
                                         mu=value,
                                         tau=uninformative_tau)
            else:
                raise Exception("Unrecognized parameter name: %s" %
                                parameter_name)
            parameters[key] = stochastic
            self.stochastics_joint_proposal.append(stochastic)
        return parameters
Exemplo n.º 30
0
alpha_vector = alpha

# Matrix of inter-group correlations
# DIMENSIONS: num_groups x num_groups
# SUPPORT: [0,1]
# DISTRIBUTION: None
B_matrix = B

#---------------------------- Prior Parameters ---------------------------#
# Actual group membership probabilities for each person
# DIMENSIONS: 1 x (num_people * num_groups)
# SUPPORT: (0,1], Elements of each vector should sum to 1 for each person
# DISTRIBUTION: Dirichlet(alpha)
pi_list = np.empty(num_people, dtype=object)
for person in range(num_people):
    person_pi = pymc.Dirichlet('pi_%i' % person, theta=alpha_vector)
    pi_list[person] = person_pi

completed_pi_list = [
    pymc.CompletedDirichlet('completed_pi_%d' % i, dist)
    for i, dist in enumerate(pi_list)
]

# Indicator variables of whether the pth person is in a group or not
# DIMENSIONS: 1 x (num_people^2) for each list, where each element is Kx1
# DOMAIN : {0,1}, only one element of vector is 1, all else 0
# DISTRIBUTION: Categorical (using Multinomial with 1 observation)
z_pTq_matrix = np.empty([num_people, num_people], dtype=object)
z_pFq_matrix = np.empty([num_people, num_people], dtype=object)
for p_person in range(num_people):
    for q_person in range(num_people):