示例#1
0
    def test_mixture_of_mvn(self):
        mu1 = np.asarray([0.0, 1.0])
        cov1 = np.diag([1.5, 2.5])
        mu2 = np.asarray([1.0, 0.0])
        cov2 = np.diag([2.5, 3.5])
        obs = np.asarray([[0.5, 0.5], mu1, mu2])
        with Model() as model:
            w = Dirichlet("w", floatX(np.ones(2)), transform=None, shape=(2, ))
            mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1)
            mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2)
            y = Mixture("x_obs", w, [mvncomp1, mvncomp2], observed=obs)

        # check logp of each component
        complogp_st = np.vstack((
            st.multivariate_normal.logpdf(obs, mu1, cov1),
            st.multivariate_normal.logpdf(obs, mu2, cov2),
        )).T
        complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
        assert_allclose(complogp, complogp_st)

        # check logp of mixture
        testpoint = model.test_point
        mixlogp_st = logsumexp(np.log(testpoint["w"]) + complogp_st,
                               axis=-1,
                               keepdims=False)
        assert_allclose(y.logp_elemwise(testpoint), mixlogp_st)

        # check logp of model
        priorlogp = st.dirichlet.logpdf(
            x=testpoint["w"],
            alpha=np.ones(2),
        )
        assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp)
示例#2
0
    def test_mixture_list_of_normals(self):
        with Model() as model:
            w = Dirichlet("w",
                          floatX(np.ones_like(self.norm_w)),
                          shape=self.norm_w.size)
            mu = Normal("mu", 0.0, 10.0, shape=self.norm_w.size)
            tau = Gamma("tau", 1.0, 1.0, shape=self.norm_w.size)
            Mixture(
                "x_obs",
                w,
                [
                    Normal.dist(mu[0], tau=tau[0]),
                    Normal.dist(mu[1], tau=tau[1])
                ],
                observed=self.norm_x,
            )
            step = Metropolis()
            trace = sample(5000,
                           step,
                           random_seed=self.random_seed,
                           progressbar=False,
                           chains=1)

        assert_allclose(np.sort(trace["w"].mean(axis=0)),
                        np.sort(self.norm_w),
                        rtol=0.1,
                        atol=0.1)
        assert_allclose(np.sort(trace["mu"].mean(axis=0)),
                        np.sort(self.norm_mu),
                        rtol=0.1,
                        atol=0.1)
示例#3
0
    def test_normal_mixture(self):
        with Model() as model:
            w = Dirichlet('w', np.ones_like(self.norm_w))

            mu = Normal('mu', 0., 10., shape=self.norm_w.size)
            tau = Gamma('tau', 1., 1., shape=self.norm_w.size)

            x_obs = NormalMixture('x_obs',
                                  w,
                                  mu,
                                  tau=tau,
                                  observed=self.norm_x)

            step = Metropolis()
            trace = sample(5000,
                           step,
                           random_seed=self.random_seed,
                           progressbar=False)

        assert_allclose(np.sort(trace['w'].mean(axis=0)),
                        np.sort(self.norm_w),
                        rtol=0.1,
                        atol=0.1)
        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
                        np.sort(self.norm_mu),
                        rtol=0.1,
                        atol=0.1)
示例#4
0
    def test_mixture_list_of_normals(self):
        with Model() as model:
            w = Dirichlet('w', floatX(np.ones_like(self.norm_w)))
            mu = Normal('mu', 0., 10., shape=self.norm_w.size)
            tau = Gamma('tau', 1., 1., shape=self.norm_w.size)
            Mixture('x_obs',
                    w, [
                        Normal.dist(mu[0], tau=tau[0]),
                        Normal.dist(mu[1], tau=tau[1])
                    ],
                    observed=self.norm_x)
            step = Metropolis()
            trace = sample(5000,
                           step,
                           random_seed=self.random_seed,
                           progressbar=False,
                           chains=1)

        assert_allclose([
            np.sort(trace['w'].mean(axis=0)),
            np.sort(trace['mu'].mean(axis=0))
        ], [np.sort(self.norm_w), np.sort(self.norm_mu)],
                        rtol=0.1,
                        atol=0.1)
        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
                        np.sort(self.norm_mu),
                        rtol=0.1,
                        atol=0.1)
示例#5
0
    def test_mixture_list_of_poissons(self):
        with Model() as model:
            w = Dirichlet('w',
                          floatX(np.ones_like(self.pois_w)),
                          shape=self.pois_w.shape)
            mu = Gamma('mu', 1., 1., shape=self.pois_w.size)
            Mixture(
                'x_obs',
                w,
                [Poisson.dist(mu[0]), Poisson.dist(mu[1])],
                observed=self.pois_x)
            step = Metropolis()
            trace = sample(5000,
                           step,
                           random_seed=self.random_seed,
                           progressbar=False,
                           chains=1)

        assert_allclose(np.sort(trace['w'].mean(axis=0)),
                        np.sort(self.pois_w),
                        rtol=0.1,
                        atol=0.1)
        assert_allclose(np.sort(trace['mu'].mean(axis=0)),
                        np.sort(self.pois_mu),
                        rtol=0.1,
                        atol=0.1)
示例#6
0
    def test_poisson_mixture(self):
        with Model() as model:
            w = Dirichlet("w", floatX(np.ones_like(self.pois_w)), shape=self.pois_w.shape)
            mu = Gamma("mu", 1.0, 1.0, shape=self.pois_w.size)
            Mixture("x_obs", w, Poisson.dist(mu), observed=self.pois_x)
            step = Metropolis()
            trace = sample(5000, step, random_seed=self.random_seed, progressbar=False, chains=1)

        assert_allclose(np.sort(trace["w"].mean(axis=0)), np.sort(self.pois_w), rtol=0.1, atol=0.1)
        assert_allclose(
            np.sort(trace["mu"].mean(axis=0)), np.sort(self.pois_mu), rtol=0.1, atol=0.1
        )
示例#7
0
    def test_normal_mixture_nd(self):
        nd, ncomp = 3, 5

        with Model() as model0:
            mus = Normal('mus', shape=(nd, ncomp))
            taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp))
            ws = Dirichlet('ws', np.ones(ncomp))
            mixture0 = NormalMixture('m', w=ws, mu=mus, tau=taus, shape=nd)

        with Model() as model1:
            mus = Normal('mus', shape=(nd, ncomp))
            taus = Gamma('taus', alpha=1, beta=1, shape=(nd, ncomp))
            ws = Dirichlet('ws', np.ones(ncomp))
            comp_dist = [
                Normal.dist(mu=mus[:, i], tau=taus[:, i]) for i in range(ncomp)
            ]
            mixture1 = Mixture('m', w=ws, comp_dists=comp_dist, shape=nd)

        testpoint = model0.test_point
        testpoint['mus'] = np.random.randn(nd, ncomp)
        assert_allclose(model0.logp(testpoint), model1.logp(testpoint))
        assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint))
示例#8
0
def run_mv_model(data, K=3, n_feats=2, mus=None, mc_samples=10000, jobs=1):
    with pm.Model() as model:
        n_samples = len(data)
        tau = pm.Deterministic('tau', pm.floatX(tt.eye(n_feats) * 10))
        mus = 0. if mus is None else mus
        mus = MvNormal('mus', mu=mus, tau=tau, shape=(K, n_feats))
        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        category = pm.Categorical('category', p=pi, shape=n_samples)
        xs = pm.MvNormal('x',
                         mu=mus[category],
                         tau=tt.eye(n_feats),
                         observed=data)

    with model:
        step2 = pm.ElemwiseCategorical(vars=[category], values=range(K))
        trace = sample(mc_samples, step2, n_jobs=jobs)

    pm.traceplot(trace, varnames=['mus', 'pi', 'tau'])
    plt.title('mv model')
    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])

    return model, mod, trace
示例#9
0
def run_normal_mv_model(data, K=3, mus=None, mc_samples=10000, jobs=1):

    with pm.Model() as model:
        n_samples, n_feats = data.shape
        #print n_samples,n_feats
        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=n_feats,
                                     eta=2.,
                                     sd_dist=pm.HalfCauchy.dist(2.5))
        L = pm.expand_packed_triangular(n_feats, packed_L)
        sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        #mus = pm.Normal('mus', mu = [[10,10], [55,55], [105,105], [155,155], [205,205]], sd = 10, shape=(K,n_feats))
        mus = pm.Normal('mus',
                        mu=mus,
                        sd=10.,
                        shape=(K, n_feats),
                        testval=data.mean(axis=0))

        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        #TODO one pi per voxel
        category = pm.Categorical('category', p=pi, shape=n_samples)
        xs = pm.MvNormal('x', mu=mus[category], chol=L, observed=data)

    with model:
        step2 = pm.ElemwiseCategorical(vars=[category], values=range(K))
        trace = sample(mc_samples, step2, n_jobs=jobs)

    pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma'])
    plt.title('normal mv model')

    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    #if chains > 1:
    #   print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()))
    return model, mod, trace
def f(mask, aoi_id, plot=False):
    '''
	* run parallel process
	'''

    # turn off pymc3 logging
    getLogger("pymc3").setLevel(ERROR)

    # get transform object for the dataset (nw corner & resolution)
    transform = from_origin(mask['bounds'][0], mask['bounds'][3],
                            mask['resolution'], mask['resolution'])

    # check that output directory is there
    if not exists("./out/"):
        makedirs("./out/")

    # seed data and uncertainty arrays for the study area and build dictionary to control outputs
    c_data = zeros(
        (ceil((mask['bounds'][3] - mask['bounds'][1]) / mask['resolution']),
         ceil((mask['bounds'][2] - mask['bounds'][0]) / mask['resolution'])))
    outputs = {
        'catholic': {
            'path': f'./out/{aoi_id}_catholic.tif',
            'mean': c_data,
            'low': c_data.copy(),
            'high': c_data.copy()
        },
        'protestant': {
            'path': f'./out/{aoi_id}_protestant.tif',
            'mean': c_data.copy(),
            'low': c_data.copy(),
            'high': c_data.copy()
        },
        'mixed': {
            'path': f'./out/{aoi_id}_mixed.tif',
            'mean': c_data.copy(),
            'low': c_data.copy(),
            'high': c_data.copy()
        }
    }

    # extract list of group names
    groups = array(list(outputs.keys()))

    # use try-finally so if it fails we can see where it got up to
    # try:

    print(f"AOI Dimensions: {c_data.shape[1]}x{c_data.shape[0]}px")

    # loop through rows and columns in the dataset
    for row in range(c_data.shape[0]):
        for col in range(c_data.shape[1]):

            print(
                f"\t...{row * c_data.shape[1] + col} of {c_data.shape[0] * c_data.shape[1]} ({(row * c_data.shape[1] + col)/(c_data.shape[0] * c_data.shape[1])*100:.2f}%)"
            )

            # get coordinates for the point
            point = Point(array2Coords(transform, row, col))
            ''' calculate hyperparameters (priors) '''

            # get the census data for the census Small Area that contains the point
            possible_matches = mask['census'].iloc[list(
                mask['census'].sindex.intersection(point.bounds))]
            district = possible_matches.loc[possible_matches.contains(point)][[
                'pcCatholic', 'pcProtesta', 'pc_Other', 'pc_None'
            ]]

            # make sure that there was a match at all!
            if len(district.index) > 0:

                # compute proportions for the three groups
                # replace zeros for 1s as you are not allowed 0's in the hyperparameters (gives Bad initial energy error)
                alphas = maximum(
                    ones(3),
                    array([
                        int(round(district['pcCatholic'].iloc[0])),
                        int(round(district['pcProtesta'].iloc[0])),
                        int(
                            round(district['pc_Other'].iloc[0] +
                                  district['pc_None'].iloc[0]))
                    ]))

            else:
                # if no matches, have equal belief for each group
                alphas = array([1, 1, 1])
            ''' calculate observations '''

            # init lists for observations
            c = []
            n = []

            # construct the radius for analysis
            polygon = point.buffer(mask['radius'])

            # loop through each dataset
            for i, gdf in mask['datasets'].items():

                # check that there is data available (this is if no data has been
                #  passed in the mask as the clip polygon does not intersect any)
                if len(gdf.index) > 0:

                    # get data points within and get IDW2 multiplier
                    possible_matches = gdf.iloc[list(
                        gdf.sindex.intersection(polygon.bounds))]
                    observations = possible_matches.loc[
                        possible_matches.within(polygon)]

                    observations['idw2'] = (
                        1 - observations.geometry.distance(point) /
                        mask['radius'])**2

                    # check that there is data available (this is if data has been
                    #  passed but the buffer polygon does not intersect it)
                    if len(observations) > 0:

                        # get weighted group counts for the current dataset
                        if i == 'mapme':
                            catholics, protestants, mixed = getMapmeGroups(
                                observations)
                        elif i == 'gps':
                            catholics, protestants, mixed = getGpsGroups(
                                observations)
                        elif i == 'survey':
                            catholics, protestants, mixed = getSurveyGroups(
                                observations)

                        # index and int the scores for each dataset
                        sums = [catholics, protestants, mixed]

                        # catch error caused by no probabilities
                        if sum(sums) > 0:

                            print(sums)

                            # process into correct format
                            sums = [
                                int(round(i / sum(sums) * 100)) for i in sums
                            ]

                            # append to observations list
                            c.append(sums)
                            n.append(sum(sums))

                            # TODO: DO I WANT ALL THESE 0'S OR ARE THEY GOING TO CAUSE PROBLEMS?

                        else:
                            # if not matches, just append some empty data
                            c.append([0, 0, 0])
                            n.append(0)
                    else:
                        # if not matches, just append some empty data
                        c.append([0, 0, 0])
                        n.append(0)
                else:
                    # if not matches, just append some empty data
                    c.append([0, 0, 0])
                    n.append(0)

            # convert observations np array
            c = array(c)
            n = array(n)

            # print(alphas, c, n)
            # print()
            ''' run model '''

            # start making MCC model
            with Model() as model:

                # TODO: LOOK INTO TESTVALS FOR PARAMETERS
                # https://nbviewer.jupyter.org/github/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter3_MCMC/Ch3_IntroMCMC_PyMC3.ipynb#Intelligent-starting-values

                # parameters of the Multinomial are from a Dirichlet
                parameters = Dirichlet('parameters', a=alphas, shape=3)

                # observed data is from a Multinomial distribution
                observed_data = Multinomial('observed_data',
                                            n=n,
                                            p=parameters,
                                            shape=3,
                                            observed=c)

                with model:

                    # estimate the Maximum a Posterior
                    # start = find_MAP()	#don't use this - it prevents convergence!

                    # sample from the posterior (NUTS is default so is not explicitly stated)
                    trace = sample(
                        # start=start,                # start at the MAP to increase chance of convergence -- DON'T DO THIS!
                        draws=1000,  # number of sample draws
                        chains=
                        4,  # number of chains in which the above are drawn (match cores)
                        cores=1,  # max permitted by library
                        tune=500,  # how many will be discarded (>=50% of draws)
                        discard_tuned_samples=True,  # discard the tuning samples
                        progressbar=
                        False,  # avoid unnecessarilly filling up the output file
                        target_accept=
                        0.9  # up from 0.8 to avoid false positives: https://eigenfoo.xyz/bayesian-modelling-cookbook/#fixing-divergences
                    )

                    if plot:
                        plot_trace(trace, show=True)

                    # retrieve summary data
                    results = summary(trace)
                    results.index = groups

            # output the result to the datasets
            for k, v in outputs.items():
                v['mean'][row, col] = results.loc[k, 'mean']
                v['low'][row, col] = results.loc[k, 'hpd_3%']
                v['high'][row, col] = results.loc[k, 'hpd_97%']

    # if we get an error - print some debugging info
    # except Exception as e:
    #     print("\n--- EXCEPTION ---")
    #     print(e)
    #     print(row, col, point)
    #     if (sums):
    #         print(sums)
    #     else:
    #         print("sums not defined yet")
    #     print(c, n)
    #
    # # whatever happens, output the results to files
    # finally:

    # loop through outputs
    for g in outputs.values():

        # output dataset to raster (hardcoded crs as was causing error)
        with rio_open(g['path'],
                      'w',
                      driver='GTiff',
                      height=g['mean'].shape[0],
                      width=g['mean'].shape[1],
                      count=3,
                      dtype='float64',
                      crs="EPSG:29902",
                      transform=transform) as out:

            # add data and uncertainties as raster bands
            out.write(g['mean'], 1)
            out.write(g['low'], 2)
            out.write(g['high'], 3)
示例#11
0
def run_normal_mv_model_mixture(data,
                                K=3,
                                mus=None,
                                mc_samples=10000,
                                jobs=1,
                                n_cols=10,
                                n_rows=100,
                                neigs=1):
    n_samples, n_feats = data.shape
    n_samples = n_cols * n_rows
    max_neigs = 4 * neigs * (neigs + 1)
    #print max_neigs
    to_fill = indxs_neigs(range(n_samples),
                          n_cols=n_cols,
                          n_rows=n_rows,
                          n=neigs)
    inds = np.where(to_fill != -1)[0]
    to_fill = to_fill[to_fill != -1]
    aux = tt.ones(n_samples * max_neigs) * -69
    shp = (K, n_feats)
    mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0)

    with pm.Model() as model:

        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=n_feats,
                                     eta=2.,
                                     sd_dist=pm.HalfCauchy.dist(2.5))
        L = pm.expand_packed_triangular(n_feats, packed_L)
        sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        sds = pm.HalfNormal('sds', sd=tt.ones(shp) * 100, shape=shp)

        mus = pm.Normal('mus',
                        mu=tt.as_tensor_variable(mus_start),
                        sd=sds,
                        shape=shp)

        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        #        #TODO one pi per voxel
        #category = pm.Categorical('category', p=pi, shape = n_samples )
        mvs = [pm.MvNormal.dist(mu=mus[i], chol=L) for i in range(K)]

        #
        #aux2 = tt.set_subtensor(aux[inds],category[to_fill])
        #prior = pm.Deterministic('prior',(tt.sum(tt.eq( aux2.reshape( (n_samples,max_neigs ) ),
        #                                               category.reshape( (n_samples,1)) ), axis = 1 )+1)/1.0 )

        pesos = pm.Dirichlet('pesos', a=np.ones((K, )))
        #obs = pm.Mixture('obs',w = pesos, comp_dists = mvs, observed = data)
        obs = my_mixture('obs', w=pesos, comp_dists=mvs, observed=data)

        with model:
            #step2 = pm.CategoricalGibbsMetropolis(vars=[category] )
            trace = sample(mc_samples, n_jobs=jobs, tune=500)

    pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma', 'mvs', 'pesos'])
    plt.title('normal mv model 40 cols')
    logp_simple(mus, category, aux3)
    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    #if chains > 1:
    #   print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()))
    return model, mod, trace
示例#12
0
def run_One_d_Model(data,
                    K=3,
                    mus=None,
                    mc_samples=10000,
                    jobs=1,
                    n_cols=10,
                    n_rows=100,
                    neigs=1):
    def logp_simple(mus, category, aux3):
        def logp_(value):
            spatial_factor = 2
            aux = tt.ones((n_samples, ))
            logps = tt.zeros((n_samples))
            sumlogps = tt.zeros((K, n_samples))
            pi = tt.sum(tt.eq(aux3, (aux * category).reshape((n_samples, 1))),
                        axis=1) / 8.0
            #TODO son logps y sumlops siempre sustituidos en todos lo valortes
            for i, label in enumerate(range(K)):
                pi_l = tt.sum(tt.eq(aux3, (aux * label).reshape(
                    (n_samples, 1))),
                              axis=1) / 8.0
                sumlogps = tt.set_subtensor(sumlogps[i, :],
                                            (mus[label].logp(value)) +
                                            (pi_l - 1) * spatial_factor)
            sumlogps = tt.sum(sumlogps, axis=0)

            for label in range(K):
                indx = tt.eq(category, tt.as_tensor_variable(label)).nonzero()
                logps = tt.set_subtensor(
                    logps[indx], (mus[label].logp(value)[indx]) +
                    (pi[indx] - 1) * spatial_factor - sumlogps[indx])
            return logps

    n_samples, n_feats = data.shape
    n_samples = n_cols * n_rows
    max_neigs = 4 * neigs * (neigs + 1)
    #print max_neigs
    to_fill = indxs_neigs(range(n_samples),
                          n_cols=n_cols,
                          n_rows=n_rows,
                          n=neigs)
    inds = np.where(to_fill != -1)[0]
    to_fill = to_fill[to_fill != -1]
    aux = tt.ones(n_samples * max_neigs) * -69
    shp = (K, n_feats)
    mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0)
    alpha = 0.1 * np.ones((n_samples, K))

    with pm.Model() as model:

        mu = pm.Normal('mus',
                       100,
                       mus_start,
                       shape=K,
                       testval=mus_start,
                       transform=Ordered())
        sd = pm.Uniform('sds', lower=0., upper=150., shape=K)

        #pi = Dirichlet('pi', a = alpha, shape= (n_samples, K) )
        pi = Dirichlet('pi', a=alpha, shape=K)

        category = pm.Categorical('category', p=pi, shape=n_samples)
        shit_max = pm.Deterministic('shit_max', tt.max(category))
        shit_min = pm.Deterministic('shit_min', tt.min(category))
        x = pm.NormalMixture()
示例#13
0
def run_normal_mv_model_mixture_DIY(data,
                                    K=3,
                                    mus=None,
                                    mc_samples=10000,
                                    jobs=1,
                                    n_cols=10,
                                    n_rows=100,
                                    neigs=1):
    def logp_simple(mus, category, aux3):
        def logp_(value):
            spatial_factor = 0.00
            aux = tt.ones((n_samples, ))
            logps = tt.zeros((n_samples))
            sumlogps = tt.zeros((K, n_samples))
            pi = tt.sum(tt.eq(aux3, (aux * category).reshape((n_samples, 1))),
                        axis=1) / 8.0
            #TODO son logps y sumlops siempre sustituidos en todos lo valortes
            for i, label in enumerate(range(K)):
                pi_l = tt.sum(tt.eq(aux3, (aux * label).reshape(
                    (n_samples, 1))),
                              axis=1) / 8.0
                sumlogps = tt.set_subtensor(sumlogps[i, :],
                                            (mus[label].logp(value)) +
                                            (pi_l - 1) * spatial_factor)
            sumlogps = tt.sum(sumlogps, axis=0)

            for label in range(K):
                indx = tt.eq(category, tt.as_tensor_variable(label)).nonzero()
                logps = tt.set_subtensor(
                    logps[indx], (mus[label].logp(value)[indx]) +
                    (pi[indx] - 1) * spatial_factor - sumlogps[indx])

            return logps

        return logp_

    #K = 3
    n_samples, n_feats = data.shape
    n_samples = n_cols * n_rows
    max_neigs = 4 * neigs * (neigs + 1)
    #print max_neigs
    to_fill = indxs_neigs(range(n_samples),
                          n_cols=n_cols,
                          n_rows=n_rows,
                          n=neigs)
    inds = np.where(to_fill != -1)[0]
    to_fill = to_fill[to_fill != -1]
    aux = tt.ones(n_samples * max_neigs) * -69
    shp = (K, n_feats)
    mus_start = np.percentile(data, np.linspace(1, 100, K), axis=0)
    alpha = 0.1 * np.ones((n_samples, K))

    with pm.Model() as model:

        packed_L = [
            pm.LKJCholeskyCov('packed_L_%d' % i,
                              n=n_feats,
                              eta=2.,
                              sd_dist=pm.HalfCauchy.dist(2.5))
            for i in range(K)
        ]
        L = [
            pm.expand_packed_triangular(n_feats, packed_L[i]) for i in range(K)
        ]
        #sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        #sds = pm.Uniform('sds',lower=0., upper=150., shape = shp )
        mus = pm.Normal('mus', mu=100., sd=1, shape=shp)

        pi = Dirichlet('pi', a=alpha, shape=(n_samples, K))

        category = pm.Categorical('category', p=pi, shape=n_samples)
        shit_max = pm.Deterministic('shit_max', tt.max(category))
        shit_min = pm.Deterministic('shit_min', tt.min(category))

        #mvs = [MvNormal('mu_%d' % i, mu=mus[i],tau=pm.floatX(1. * np.eye(n_feats)),shape=(n_feats,)) for i in range(K)]
        mvs = [pm.MvNormal.dist(mu=mus[i], chol=L[i]) for i in range(K)]

        aux2 = tt.set_subtensor(aux[inds], category[to_fill])
        xs = DensityDist('x',
                         logp_simple(mvs, category,
                                     aux2.reshape((n_samples, max_neigs))),
                         observed=data)

        with model:
            step2 = step2 = pm.ElemwiseCategorical(vars=[category],
                                                   values=range(K))
            trace = sample(mc_samples, step=step2, tune=1000, chains=4)

    pm.traceplot(trace, varnames=['mus', 'sds'])
    plt.title('logp_sum_mo_alpha_700_tunes_spatial_2')

    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    return model, mod, trace
示例#14
0
obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps])
obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)])
# X is now (nObs,K)
X_start = np.concatenate([X_start[:, 0:T[i], i].T for i in range(N)])
# O is now (nObs, Dd)
# TODO: implement this with sparse matrices
O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)])

#import pdb; pdb.set_trace()

model = Model()
with model:
    #Fails: #pi = Dirichlet('pi', a = as_tensor_variable([0.147026,0.102571,0.239819,0.188710,0.267137,0.054738]), shape=M, testval = np.ones(M)/float(M))
    pi = Dirichlet('pi',
                   a=as_tensor_variable([
                       0.147026, 0.102571, 0.239819, 0.188710, 0.267137,
                       0.054738
                   ]),
                   shape=M)
    pi_min_potential = Potential('pi_min_potential',
                                 TT.switch(TT.min(pi) < .001, -np.inf, 0))

    Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M))

    #S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs), testval=np.ones(nObs,dtype='int32'))
    S = DiscreteObsMJP('S',
                       pi=pi,
                       Q=Q,
                       M=M,
                       nObs=nObs,
                       observed_jumps=obs_jumps,
                       T=T,
    def train_pymc3(docs_te, docs_tr, n_samples_te, n_samples_tr, n_words,
                    n_topics, n_tokens):
        """
        Return: 
            Pymc3 LDA results
        
        Parameters:
            docs_tr: training documents (processed)
            docs_te: testing documents (processed)
            n_samples_te: number of testing docs
            n_samples_tr: number of training docs
            n_words: size of vocabulary
            n_topics: number of topics to learn
            n_tokens: number of non-zero datapoints in processed training tf matrix
            
        """

        # Log-likelihood of documents for LDA
        def logp_lda_doc(beta, theta):
            """
            Returns the log-likelihood function for given documents.

            K : number of topics in the model
            V : number of words (size of vocabulary)
            D : number of documents (in a mini-batch)

            Parameters
            ----------
            beta : tensor (K x V)
              Word distribution.
            theta : tensor (D x K)
              Topic distributions for the documents.
            """
            def ll_docs_f(docs):
                dixs, vixs = docs.nonzero()
                vfreqs = docs[dixs, vixs]
                ll_docs = vfreqs * pmmath.logsumexp(
                    tt.log(theta[dixs]) + tt.log(beta.T[vixs]),
                    axis=1).ravel()

                # Per-word log-likelihood times no. of tokens in the whole dataset
                return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) * n_tokens

            return ll_docs_f

        # fit the pymc3 LDA

        # we have sparse dataset. It's better to have dence batch so that all words accure there
        minibatch_size = 128

        # defining minibatch
        doc_t_minibatch = pm.Minibatch(docs_tr.toarray(), minibatch_size)
        doc_t = shared(docs_tr.toarray()[:minibatch_size])

        with pm.Model() as model:
            theta = Dirichlet(
                'theta',
                a=pm.floatX((1.0 / n_topics) * np.ones(
                    (minibatch_size, n_topics))),
                shape=(minibatch_size, n_topics),
                transform=t_stick_breaking(1e-9),
                # do not forget scaling
                total_size=n_samples_tr)
            beta = Dirichlet('beta',
                             a=pm.floatX((1.0 / n_topics) * np.ones(
                                 (n_topics, n_words))),
                             shape=(n_topics, n_words),
                             transform=t_stick_breaking(1e-9))
            # Note, that we defined likelihood with scaling, so here we need no additional `total_size` kwarg
            doc = pm.DensityDist('doc',
                                 logp_lda_doc(beta, theta),
                                 observed=doc_t)

        # Encoder
        class LDAEncoder:
            """Encode (term-frequency) document vectors to variational means and (log-transformed) stds.
            """
            def __init__(self,
                         n_words,
                         n_hidden,
                         n_topics,
                         p_corruption=0,
                         random_seed=1):
                rng = np.random.RandomState(random_seed)
                self.n_words = n_words
                self.n_hidden = n_hidden
                self.n_topics = n_topics
                self.w0 = shared(0.01 * rng.randn(n_words, n_hidden).ravel(),
                                 name='w0')
                self.b0 = shared(0.01 * rng.randn(n_hidden), name='b0')
                self.w1 = shared(0.01 * rng.randn(n_hidden, 2 *
                                                  (n_topics - 1)).ravel(),
                                 name='w1')
                self.b1 = shared(0.01 * rng.randn(2 * (n_topics - 1)),
                                 name='b1')
                self.rng = MRG_RandomStreams(seed=random_seed)
                self.p_corruption = p_corruption

            def encode(self, xs):
                if 0 < self.p_corruption:
                    dixs, vixs = xs.nonzero()
                    mask = tt.set_subtensor(
                        tt.zeros_like(xs)[dixs, vixs],
                        self.rng.binomial(size=dixs.shape,
                                          n=1,
                                          p=1 - self.p_corruption))
                    xs_ = xs * mask
                else:
                    xs_ = xs

                w0 = self.w0.reshape((self.n_words, self.n_hidden))
                w1 = self.w1.reshape((self.n_hidden, 2 * (self.n_topics - 1)))
                hs = tt.tanh(xs_.dot(w0) + self.b0)
                zs = hs.dot(w1) + self.b1
                zs_mean = zs[:, :(self.n_topics - 1)]
                zs_rho = zs[:, (self.n_topics - 1):]
                return {'mu': zs_mean, 'rho': zs_rho}

            def get_params(self):
                return [self.w0, self.b0, self.w1, self.b1]

            # call Encoder

        encoder = LDAEncoder(n_words=n_words,
                             n_hidden=100,
                             n_topics=n_topics,
                             p_corruption=0.0)
        local_RVs = OrderedDict([(theta, encoder.encode(doc_t))])

        # get parameters
        encoder_params = encoder.get_params()

        # Train pymc3 Model
        η = .1
        s = shared(η)

        def reduce_rate(a, h, i):
            s.set_value(η / ((i / minibatch_size) + 1)**.7)

        with model:
            approx = pm.MeanField(local_rv=local_RVs)
            approx.scale_cost_to_minibatch = False
            inference = pm.KLqp(approx)
        inference.fit(10000,
                      callbacks=[reduce_rate],
                      obj_optimizer=pm.sgd(learning_rate=s),
                      more_obj_params=encoder_params,
                      total_grad_norm_constraint=200,
                      more_replacements={doc_t: doc_t_minibatch})

        # Extracting characteristic words
        doc_t.set_value(docs_tr.toarray())
        samples = pm.sample_approx(approx, draws=100)
        beta_pymc3 = samples['beta'].mean(axis=0)

        # Predictive distribution
        def calc_pp(ws, thetas, beta, wix):
            """
            Parameters
            ----------
            ws: ndarray (N,)
                Number of times the held-out word appeared in N documents.
            thetas: ndarray, shape=(N, K)
                Topic distributions for N documents.
            beta: ndarray, shape=(K, V)
                Word distributions for K topics.
            wix: int
                Index of the held-out word

            Return
            ------
            Log probability of held-out words.
            """
            return ws * np.log(thetas.dot(beta[:, wix]))

        def eval_lda(transform, beta, docs_te, wixs):
            """Evaluate LDA model by log predictive probability.

            Parameters
            ----------
            transform: Python function
                Transform document vectors to posterior mean of topic proportions.
            wixs: iterable of int
                Word indices to be held-out.
            """
            lpss = []
            docs_ = deepcopy(docs_te)
            thetass = []
            wss = []
            total_words = 0
            for wix in wixs:
                ws = docs_te[:, wix].ravel()
                if 0 < ws.sum():
                    # Hold-out
                    docs_[:, wix] = 0

                    # Topic distributions
                    thetas = transform(docs_)

                    # Predictive log probability
                    lpss.append(calc_pp(ws, thetas, beta, wix))

                    docs_[:, wix] = ws
                    thetass.append(thetas)
                    wss.append(ws)
                    total_words += ws.sum()
                else:
                    thetass.append(None)
                    wss.append(None)

            # Log-probability
            lp = np.sum(np.hstack(lpss)) / total_words

            return {'lp': lp, 'thetass': thetass, 'beta': beta, 'wss': wss}

        inp = tt.matrix(dtype='int64')
        sample_vi_theta = theano.function([inp],
                                          approx.sample_node(
                                              approx.model.theta,
                                              100,
                                              more_replacements={
                                                  doc_t: inp
                                              }).mean(0))

        def transform_pymc3(docs):
            return sample_vi_theta(docs)

        result_pymc3 = eval_lda(transform_pymc3, beta_pymc3, docs_te.toarray(),
                                np.arange(100))
        print('Predictive log prob (pm3) = {}'.format(result_pymc3['lp']))

        return result_pymc3
示例#16
0
    def test_mixture_of_mixture(self):
        nbr = 4
        with Model() as model:
            # mixtures components
            g_comp = Normal.dist(mu=Exponential('mu_g',
                                                lam=1.0,
                                                shape=nbr,
                                                transform=None),
                                 sigma=1,
                                 shape=nbr)
            l_comp = Lognormal.dist(mu=Exponential('mu_l',
                                                   lam=1.0,
                                                   shape=nbr,
                                                   transform=None),
                                    sigma=1,
                                    shape=nbr)
            # weight vector for the mixtures
            g_w = Dirichlet('g_w',
                            a=floatX(np.ones(nbr) * 0.0000001),
                            transform=None)
            l_w = Dirichlet('l_w',
                            a=floatX(np.ones(nbr) * 0.0000001),
                            transform=None)
            # mixture components
            g_mix = Mixture.dist(w=g_w, comp_dists=g_comp)
            l_mix = Mixture.dist(w=l_w, comp_dists=l_comp)
            # mixture of mixtures
            mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None)
            mix = Mixture('mix',
                          w=mix_w,
                          comp_dists=[g_mix, l_mix],
                          observed=np.exp(self.norm_x))

        test_point = model.test_point

        def mixmixlogp(value, point):
            priorlogp = st.dirichlet.logpdf(x=point['g_w'],
                                            alpha=np.ones(nbr)*0.0000001,
                                            ) + \
                        st.expon.logpdf(x=point['mu_g']).sum() + \
                        st.dirichlet.logpdf(x=point['l_w'],
                                            alpha=np.ones(nbr)*0.0000001,
                                            ) + \
                        st.expon.logpdf(x=point['mu_l']).sum() + \
                        st.dirichlet.logpdf(x=point['mix_w'],
                                            alpha=np.ones(2),
                                            )
            complogp1 = st.norm.logpdf(x=value, loc=point['mu_g'])
            mixlogp1 = logsumexp(np.log(point['g_w']) + complogp1,
                                 axis=-1,
                                 keepdims=True)
            complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l']))
            mixlogp2 = logsumexp(np.log(point['l_w']) + complogp2,
                                 axis=-1,
                                 keepdims=True)
            complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1)
            mixmixlogpg = logsumexp(np.log(point['mix_w']) + complogp_mix,
                                    axis=-1,
                                    keepdims=True)
            return priorlogp, mixmixlogpg

        value = np.exp(self.norm_x)[:, None]
        priorlogp, mixmixlogpg = mixmixlogp(value, test_point)

        # check logp of mixture
        assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))

        # check model logp
        assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point))

        # check input and check logp again
        test_point['g_w'] = np.asarray([.1, .1, .2, .6])
        test_point['mu_g'] = np.exp(np.random.randn(nbr))
        priorlogp, mixmixlogpg = mixmixlogp(value, test_point)
        assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
        assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point))
    def setUp(self):
        #test Claims
        N = 100  # Number of patients
        M = 6  # Number of hidden states
        K = 10  # Number of comorbidities
        D = 721  # Number of claims
        Dd = 80  # Maximum number of claims that can occur at once
        min_obs = 10  # Minimum number of observed claims per patient
        max_obs = 30  # Maximum number of observed claims per patient
        self.M = M
        self.N = N
        self.K = K
        # Load pre-generated data
        from pickle import load

        T = load(open('../../data/X_layer_100_patients_old/T.pkl', 'rb'))
        self.T = T
        obs_jumps = load(
            open('../../data/X_layer_100_patients_old/obs_jumps.pkl', 'rb'))
        S_start = load(open('../../data/X_layer_100_patients_old/S.pkl', 'rb'))
        X_start = load(open('../../data/X_layer_100_patients_old/X.pkl', 'rb'))
        Z_start = load(open('../../data/X_layer_100_patients_old/Z.pkl', 'rb'))
        L_start = load(open('../../data/X_layer_100_patients_old/L.pkl', 'rb'))
        O = load(open('../../data/X_layer_100_patients_old/O_input.pkl', 'rb'))

        self.nObs = nObs = T.sum()
        self.zeroIndices = np.roll(self.T.cumsum(), 1)
        self.zeroIndices[0] = 0
        obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps])
        obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)])
        O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)])
        S_start = np.concatenate([S_start[i, 0:T[i]] for i in range(N)])
        X_start = np.concatenate([X_start[:, 0:T[i], i].T for i in range(N)])
        anchors = []
        self.Z_original
        mask = np.ones((K, D))
        for anchor in anchors:
            for hold in anchor[1]:
                mask[:, hold] = 0
                mask[anchor[0], hold] = 1
        Z_start = Z_start[mask.nonzero()]

        with Model() as self.model:
            self.pi = Dirichlet('pi',
                                a=as_tensor_variable(
                                    [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]),
                                shape=M)
            pi_min_potential = Potential(
                'pi_min_potential',
                TT.switch(TT.min(self.pi) < .1, -np.inf, 0))
            self.Q = DiscreteObsMJP_unif_prior('Q',
                                               M=M,
                                               lower=0.0,
                                               upper=1.0,
                                               shape=(M, M))
            self.S = DiscreteObsMJP('S',
                                    pi=self.pi,
                                    Q=self.Q,
                                    M=M,
                                    nObs=nObs,
                                    observed_jumps=obs_jumps,
                                    T=T,
                                    shape=(nObs))
            self.B0 = Beta('B0', alpha=1., beta=1., shape=(K, M))
            self.B = Beta('B', alpha=1., beta=1., shape=(K, M))
            self.X = Comorbidities('X',
                                   S=self.S,
                                   B0=self.B0,
                                   B=self.B,
                                   T=T,
                                   shape=(nObs, K))
            #self.Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D))
            self.Z = Beta_with_anchors('Z',
                                       anchors=anchors,
                                       K=K,
                                       D=D,
                                       alpha=0.1,
                                       beta=1.,
                                       shape=(K, D))
            self.L = Beta('L', alpha=1., beta=1., shape=D)
            self.testClaims = Claims('O_obs',
                                     X=self.X,
                                     Z=self.Z,
                                     L=self.L,
                                     T=T,
                                     D=D,
                                     O_input=O,
                                     shape=(nObs, Dd),
                                     observed=O)

            self.forS = ForwardS(vars=[self.S],
                                 N=N,
                                 T=T,
                                 nObs=nObs,
                                 observed_jumps=obs_jumps)
            self.forX = ForwardX(vars=[self.X],
                                 N=N,
                                 T=T,
                                 K=K,
                                 D=D,
                                 Dd=Dd,
                                 O=O,
                                 nObs=nObs)

        from scipy.special import logit

        self.Q_raw_log = logit(
            np.array([0.631921, 0.229485, 0.450538, 0.206042, 0.609582]))

        B_lo = logit(
            np.array(
                [[0.000001, 0.760000, 0.720000, 0.570000, 0.700000, 0.610000],
                 [0.000001, 0.460000, 0.390000, 0.220000, 0.200000, 0.140000],
                 [0.000001, 0.620000, 0.620000, 0.440000, 0.390000, 0.240000],
                 [0.000001, 0.270000, 0.210000, 0.170000, 0.190000, 0.070000],
                 [0.000001, 0.490000, 0.340000, 0.220000, 0.160000, 0.090000],
                 [0.000001, 0.620000, 0.340000, 0.320000, 0.240000, 0.120000],
                 [0.000001, 0.550000, 0.390000, 0.320000, 0.290000, 0.150000],
                 [0.000001, 0.420000, 0.240000, 0.170000, 0.170000, 0.110000],
                 [0.000001, 0.310000, 0.300000, 0.230000, 0.190000, 0.110000],
                 [0.000001, 0.470000, 0.340000, 0.190000, 0.190000,
                  0.110000]]))

        B0_lo = logit(
            np.array(
                [[0.410412, 0.410412, 0.418293, 0.418293, 0.429890, 0.429890],
                 [0.240983, 0.240983, 0.240983, 0.240983, 0.240983, 0.240983],
                 [0.339714, 0.339714, 0.339714, 0.339714, 0.339714, 0.339714],
                 [0.130415, 0.130415, 0.130415, 0.130415, 0.130415, 0.130415],
                 [0.143260, 0.143260, 0.143260, 0.143260, 0.143260, 0.143260],
                 [0.211465, 0.211465, 0.211465, 0.211465, 0.211465, 0.211465],
                 [0.194187, 0.194187, 0.194187, 0.194187, 0.194187, 0.194187],
                 [0.185422, 0.185422, 0.185422, 0.185422, 0.185422, 0.185422],
                 [0.171973, 0.171973, 0.171973, 0.171973, 0.171973, 0.171973],
                 [0.152277, 0.152277, 0.152277, 0.152277, 0.152277,
                  0.152277]]))

        Z_lo = logit(Z_start)
        L_lo = logit(L_start)
        #import pdb; pdb.set_trace()
        self.myTestPoint = {
            'Q_ratematrixoneway': self.Q_raw_log,
            'B_logodds': B_lo,
            'B0_logodds': B0_lo,
            'S': S_start,
            'X': X_start,
            'Z_anchoredbeta': Z_lo,
            'L_logodds': L_lo,
            'pi_stickbreaking': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
        }
示例#18
0
    def test_normal_mixture_nd(self, nd, ncomp):
        nd = to_tuple(nd)
        ncomp = int(ncomp)
        comp_shape = nd + (ncomp, )
        test_mus = np.random.randn(*comp_shape)
        test_taus = np.random.gamma(1, 1, size=comp_shape)
        observed = generate_normal_mixture_data(w=np.ones(ncomp) / ncomp,
                                                mu=test_mus,
                                                sd=1 / np.sqrt(test_taus),
                                                size=10)

        with Model() as model0:
            mus = Normal("mus", shape=comp_shape)
            taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape)
            ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp, ))
            mixture0 = NormalMixture("m",
                                     w=ws,
                                     mu=mus,
                                     tau=taus,
                                     shape=nd,
                                     comp_shape=comp_shape)
            obs0 = NormalMixture("obs",
                                 w=ws,
                                 mu=mus,
                                 tau=taus,
                                 shape=nd,
                                 comp_shape=comp_shape,
                                 observed=observed)

        with Model() as model1:
            mus = Normal("mus", shape=comp_shape)
            taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape)
            ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp, ))
            comp_dist = [
                Normal.dist(mu=mus[..., i], tau=taus[..., i], shape=nd)
                for i in range(ncomp)
            ]
            mixture1 = Mixture("m", w=ws, comp_dists=comp_dist, shape=nd)
            obs1 = Mixture("obs",
                           w=ws,
                           comp_dists=comp_dist,
                           shape=nd,
                           observed=observed)

        with Model() as model2:
            # Expected to fail if comp_shape is not provided,
            # nd is multidim and it does not broadcast with ncomp. If by chance
            # it does broadcast, an error is raised if the mixture is given
            # observed data.
            # Furthermore, the Mixture will also raise errors when the observed
            # data is multidimensional but it does not broadcast well with
            # comp_dists.
            mus = Normal("mus", shape=comp_shape)
            taus = Gamma("taus", alpha=1, beta=1, shape=comp_shape)
            ws = Dirichlet("ws", np.ones(ncomp), shape=(ncomp, ))
            if len(nd) > 1:
                if nd[-1] != ncomp:
                    with pytest.raises(ValueError):
                        NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd)
                    mixture2 = None
                else:
                    mixture2 = NormalMixture("m",
                                             w=ws,
                                             mu=mus,
                                             tau=taus,
                                             shape=nd)
            else:
                mixture2 = NormalMixture("m", w=ws, mu=mus, tau=taus, shape=nd)
            observed_fails = False
            if len(nd) >= 1 and nd != (1, ):
                try:
                    np.broadcast(np.empty(comp_shape), observed)
                except Exception:
                    observed_fails = True
            if observed_fails:
                with pytest.raises(ValueError):
                    NormalMixture("obs",
                                  w=ws,
                                  mu=mus,
                                  tau=taus,
                                  shape=nd,
                                  observed=observed)
                obs2 = None
            else:
                obs2 = NormalMixture("obs",
                                     w=ws,
                                     mu=mus,
                                     tau=taus,
                                     shape=nd,
                                     observed=observed)

        testpoint = model0.test_point
        testpoint["mus"] = test_mus
        testpoint["taus"] = test_taus
        assert_allclose(model0.logp(testpoint), model1.logp(testpoint))
        assert_allclose(mixture0.logp(testpoint), mixture1.logp(testpoint))
        assert_allclose(obs0.logp(testpoint), obs1.logp(testpoint))
        if mixture2 is not None and obs2 is not None:
            assert_allclose(model0.logp(testpoint), model2.logp(testpoint))
        if mixture2 is not None:
            assert_allclose(mixture0.logp(testpoint), mixture2.logp(testpoint))
        if obs2 is not None:
            assert_allclose(obs0.logp(testpoint), obs2.logp(testpoint))
示例#19
0
def run_normal_mv_model_prior(data,
                              K=3,
                              mus=None,
                              mc_samples=10000,
                              jobs=1,
                              n_cols=10,
                              n_rows=100,
                              neigs=1):
    n_samples, n_feats = data.shape
    n_samples = n_cols * n_rows
    max_neigs = 4 * neigs * (neigs + 1)
    #print max_neigs
    to_fill = indxs_neigs(range(n_samples),
                          n_cols=n_cols,
                          n_rows=n_rows,
                          n=neigs)
    inds = np.where(to_fill != -1)[0]
    to_fill = to_fill[to_fill != -1]
    aux = tt.ones(n_samples * max_neigs) * -69

    with pm.Model() as model:

        packed_L = pm.LKJCholeskyCov('packed_L',
                                     n=n_feats,
                                     eta=2.,
                                     sd_dist=pm.HalfCauchy.dist(2.5))
        L = pm.expand_packed_triangular(n_feats, packed_L)
        sigma = pm.Deterministic('Sigma', L.dot(L.T))

        mus = 0. if mus is None else mus

        mus = pm.Normal('mus',
                        mu=[[10, 10], [55, 55], [105, 105], [155, 155],
                            [205, 205]],
                        sd=10,
                        shape=(K, n_feats))
        #sds = pm.HalfNormal('sds',sd = 50, shape = (K,n_feats) )
        #mus = pm.Normal('mus', mu = [10,55,105,155,205], sd = sds , shape=(K,n_feats) )
        #nu = pm.Exponential('nu', 1./10, shape=(K,n_feats), testval=tt.ones((K,n_feats)) )
        #mus = pm.StudentT('mus',nu=nu, mu = [[10],[55],[105],[155],[205]], sd = 100., shape=(K,n_feats))

        pi = Dirichlet('pi', a=pm.floatX([1. for _ in range(K)]), shape=K)
        #TODO one pi per voxel
        category = pm.Categorical('category', p=pi, shape=n_samples)
        #pm.Deterministic('pri', tt.as_tensor_variable(get_prior2(category)))

        #prior = pm.Deterministic('prior',tt.stack( [tt.sum(tt.eq(category[i], category[indxs_neig(i, n_rows=73, n_cols=74)]))/8.0 for i in range(73*74) ] ))

        #prior = pm.Deterministic('prior',tt.sum(tt.eq(category  , category[[j for j in range(8)]].reshape( (8,1) ) )))

        aux2 = tt.set_subtensor(aux[inds], category[to_fill])
        prior = pm.Deterministic(
            'prior', (tt.sum(tt.eq(aux2.reshape(
                (n_samples, max_neigs)), category.reshape((n_samples, 1))),
                             axis=1) + 0.0) / 8.0)
        #prior2 = pm.Normal('prior2', mu = prior, sd = 0.5, shape= n_samples)

        # aux3 = tt.as_tensor_variable(pm.floatX([1,1,2,2,2,2,2,2,2,2]*100 ))
        #        aux3 = tt.set_subtensor( aux3[(tt.eq(category,1)).nonzero()], 2  )
        # prior2 = pm.Deterministic('prior2', aux3 )
        #
        xs = DensityDist('x',
                         logp_gmix(mus[category], L, prior, category),
                         observed=data)

    with model:
        step2 = pm.ElemwiseCategorical(vars=[category], values=range(K))
        #step = pm.CategoricalGibbsMetropolis(vars = [prior] )
        trace = sample(mc_samples, step=[step2], n_jobs=jobs, tune=600)

    pm.traceplot(trace, varnames=['mus', 'pi', 'Sigma'])
    plt.title('normal mv model 40 cols')

    mod = stats.mode(trace['category'][int(mc_samples * 0.75):])
    #if chains > 1:
    #   print (max(np.max(gr_stats) for gr_stats in pm.gelman_rubin(trace).values()))
    return model, mod, trace
with pm.Model() as model:
    # Prior for covariance matrix
    
    # packed_L = [pm.LKJCholeskyCov('packedL_%d' % i, n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1)) for i in range(n_comp)]
    # L = [pm.expand_packed_triangular(dimensions, packed_L[i]) for i in range(n_comp)]
    # Σ = [pm.Deterministic('Σ_%d' % i, L[i].dot(L[i].T)) for i in range(n_comp)]
   
    packed_L = pm.LKJCholeskyCov('packedL', n=dimensions, eta=1., sd_dist=pm.Gamma.dist(mu = 2, sigma = 1))
    L = pm.expand_packed_triangular(dimensions, packed_L)
    Σ = pm.Deterministic('Σ', L.dot(L.T))
    
    # Prior for mean:
    mus = [MvNormal('mu_%d' % i, mu=pm.floatX(np.zeros(dimensions)), tau=pm.floatX(0.1 * np.eye(2)), shape=(dimensions,)) for i in range(n_comp)]
    # Prior for weights:
    pi = Dirichlet('pi', a=pm.floatX(concentration * np.ones(n_comp)), shape=(n_comp,))   
    prior = sample_prior()
    x = pm.DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data)
    
# Plot prior for some parameters:
# print(prior.keys())
# plt.hist(prior['Σ'][:,0,1])

with model:
    %time hmc_trace = pm.sample(draws=250, tune=100, cores=4)

with model:
    %time fit_advi = pm.fit(n=50000, obj_optimizer=pm.adagrad(learning_rate=1e-1), method = 'advi')

advi_elbo = pd.DataFrame(
    {'log-ELBO': -np.log(fit_advi.hist),
示例#21
0
def run_lda(args):
    tf_vectorizer, docs_tr, docs_te = prepare_sparse_matrix_nonlabel(args.n_tr, args.n_te, args.n_word)
    feature_names = tf_vectorizer.get_feature_names()
    doc_tr_minibatch = pm.Minibatch(docs_tr.toarray(), args.bsz)
    doc_tr = shared(docs_tr.toarray()[:args.bsz])

    def log_prob(beta, theta):
        """Returns the log-likelihood function for given documents.

        K : number of topics in the model
        V : number of words (size of vocabulary)
        D : number of documents (in a mini-batch)

        Parameters
        ----------
        beta : tensor (K x V)
            Word distributions.
        theta : tensor (D x K)
            Topic distributions for documents.
        """

        def ll_docs_f(docs):
            dixs, vixs = docs.nonzero()
            vfreqs = docs[dixs, vixs]
            ll_docs = (vfreqs * pmmath.logsumexp(tt.log(theta[dixs]) + tt.log(beta.T[vixs]),
                                                 axis=1).ravel())

            return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9)

        return ll_docs_f

    with pm.Model() as model:
        beta = Dirichlet("beta",
                         a=pm.floatX((1. / args.n_topic) * np.ones((args.n_topic, args.n_word))),
                         shape=(args.n_topic, args.n_word), )

        theta = Dirichlet("theta",
                          a=pm.floatX((10. / args.n_topic) * np.ones((args.bsz, args.n_topic))),
                          shape=(args.bsz, args.n_topic), total_size=args.n_tr, )

        doc = pm.DensityDist("doc", log_prob(beta, theta), observed=doc_tr)

    encoder = ThetaEncoder(n_words=args.n_word, n_hidden=100, n_topics=args.n_topic)
    local_RVs = OrderedDict([(theta, encoder.encode(doc_tr))])
    encoder_params = encoder.get_params()

    s = shared(args.lr)

    def reduce_rate(a, h, i):
        s.set_value(args.lr / ((i / args.bsz) + 1) ** 0.7)

    with model:
        approx = pm.MeanField(local_rv=local_RVs)
        approx.scale_cost_to_minibatch = False
        inference = pm.KLqp(approx)

    inference.fit(args.n_iter,
                  callbacks=[reduce_rate, pm.callbacks.CheckParametersConvergence(diff="absolute")],
                  obj_optimizer=pm.adam(learning_rate=s),
                  more_obj_params=encoder_params,
                  total_grad_norm_constraint=200,
                  more_replacements={ doc_tr: doc_tr_minibatch }, )

    doc_tr.set_value(docs_tr.toarray())
    inp = tt.matrix(dtype="int64")
    sample_vi_theta = theano.function([inp],
        approx.sample_node(approx.model.theta, args.n_sample, more_replacements={doc_tr: inp}), )

    test = docs_te.toarray()
    test_n = test.sum(1)

    beta_pymc3 = pm.sample_approx(approx, draws=args.n_sample)['beta']
    theta_pymc3 = sample_vi_theta(test)

    assert beta_pymc3.shape == (args.n_sample, args.n_topic, args.n_word)
    assert theta_pymc3.shape == (args.n_sample, args.n_te, args.n_topic)

    beta_mean = beta_pymc3.mean(0)
    theta_mean = theta_pymc3.mean(0)

    pred_rate = theta_mean.dot(beta_mean)
    pp_test = (test * np.log(pred_rate)).sum(1) / test_n

    posteriors = { 'theta': theta_pymc3, 'beta': beta_pymc3,}

    log_top_words(beta_pymc3.mean(0), feature_names, n_top_words=args.n_top_word)
    save_elbo(approx.hist)
    save_pp(pp_test)
    save_draws(posteriors)
示例#22
0
df = pd.read_csv("../data/data-lda.txt")

n_person = len(df["PersonID"].unique())
n_item = 120
K = 6
IDs = df.values[:, 0].astype(np.int32) - 1
Items = df.values[:, 1].astype(np.int32) - 1

#shape = クラスの数の確率変数に、クラスの値を取るデータ数次元のベクトルを入れる操作がありますが
#その詳細な説明は(https://pymc-devs.github.io/pymc3/notebooks/GLM-hierarchical.html)参照

basic_model = Model()
with basic_model:
    #事前分布[50,6]
    theta = Dirichlet('p_theta', a=(1.0 / K) * np.ones(K), shape=(n_person, K))
    #事前分布[6,112]
    phi = Dirichlet('p_phi',
                    a=(1.0 / n_item) * np.ones(n_item),
                    shape=(K, n_item))

    #likelihood
    #データ数 x 各データのカテゴリー確率ベクトル [1117,6]
    theta = theta[IDs, :]
    #データ数 x 各IDに対するアイテム確率ベクトル [1117,112]
    person_to_item = tt.dot(theta, phi)

    H = Categorical("tes", p=person_to_item, shape=(1117), observed=Items)

    ##サンプリング
    #パラメータの数が多く、ローカルで実行するには重いのでサンプリング数はかなり少なくしてます。
示例#23
0
    X.append(np.random.normal(U[i], S[i] * np.eye(ndims), Y[i]))
X_obs = np.concatenate((X[0], X[1], X[2]), 0)

#print X_obs
print C
print U
plt.plot(X_obs[:], np.ones(X_obs.shape), 'o', markersize=8)
plt.show()

# Infer class labels
from pymc3 import Dirichlet, Normal, MvNormal, HalfNormal, Categorical
import theano.tensor

with Model() as gmm:
    C = Dirichlet('mixture_coeff',
                  dirichlet_scale * dirichlet_shape,
                  shape=nclusters)
    S = HalfNormal('S', sd=sd_halfnormal, shape=nclusters)
    U = Normal('mu', mu=mean_prior_mean, sd=mean_prior_sd, shape=nclusters)
    Y = Categorical('labels', p=C, shape=nsamples)
    X = Normal('X', mu=U[Y], sd=S[Y], observed=X_obs)

from pymc3 import find_MAP
map_estimate = find_MAP(model=gmm)
print map_estimate

from pymc3 import NUTS, sample, Slice, Metropolis, ElemwiseCategorical, HamiltonianMC

modified_map_estimate = copy.deepcopy(map_estimate)
modified_map_estimate['mu'] = [
    1 if x < 0.001 else x for x in modified_map_estimate['mu']
示例#24
0
# define model mixture log-likelihood 
def logp_mix(mf):
    def logp_(value):
        logps = tt.log(mf) + value

        return tt.sum(logsumexp(logps, axis=1))
        
    return logp_


# define and fit the probabilistic model
with Model() as model:
    tau = HalfCauchy('tau', beta = 1.)
    
    mf = Dirichlet('mf', a = tt.ones(M)/tau, shape=(M,))
    xs = DensityDist('logml', logp_mix(mf), observed=LME)

with model:    
    approx = fit(method='advi', n = 10000)
    
trace = approx.sample(nsample)    
traceplot(trace);

#compute exceedance probability
ep, _ = np.histogram(trace['mf'].argmax(axis = 1), bins = M)
ep = pd.DataFrame({'ep':ep/nsample, 'models': cols})


fig = plt.figure(figsize = (10,5))
ax1 = plt.subplot(121)
示例#25
0
    def test_mixture_of_mixture(self):
        if theano.config.floatX == "float32":
            rtol = 1e-4
        else:
            rtol = 1e-7
        nbr = 4
        with Model() as model:
            # mixtures components
            g_comp = Normal.dist(mu=Exponential("mu_g",
                                                lam=1.0,
                                                shape=nbr,
                                                transform=None),
                                 sigma=1,
                                 shape=nbr)
            l_comp = Lognormal.dist(mu=Exponential("mu_l",
                                                   lam=1.0,
                                                   shape=nbr,
                                                   transform=None),
                                    sigma=1,
                                    shape=nbr)
            # weight vector for the mixtures
            g_w = Dirichlet("g_w",
                            a=floatX(np.ones(nbr) * 0.0000001),
                            transform=None,
                            shape=(nbr, ))
            l_w = Dirichlet("l_w",
                            a=floatX(np.ones(nbr) * 0.0000001),
                            transform=None,
                            shape=(nbr, ))
            # mixture components
            g_mix = Mixture.dist(w=g_w, comp_dists=g_comp)
            l_mix = Mixture.dist(w=l_w, comp_dists=l_comp)
            # mixture of mixtures
            mix_w = Dirichlet("mix_w",
                              a=floatX(np.ones(2)),
                              transform=None,
                              shape=(2, ))
            mix = Mixture("mix",
                          w=mix_w,
                          comp_dists=[g_mix, l_mix],
                          observed=np.exp(self.norm_x))

        test_point = model.test_point

        def mixmixlogp(value, point):
            floatX = theano.config.floatX
            priorlogp = (st.dirichlet.logpdf(
                x=point["g_w"],
                alpha=np.ones(nbr) * 0.0000001,
            ).astype(floatX) +
                         st.expon.logpdf(x=point["mu_g"]).sum(dtype=floatX) +
                         st.dirichlet.logpdf(
                             x=point["l_w"],
                             alpha=np.ones(nbr) * 0.0000001,
                         ).astype(floatX) +
                         st.expon.logpdf(x=point["mu_l"]).sum(dtype=floatX) +
                         st.dirichlet.logpdf(
                             x=point["mix_w"],
                             alpha=np.ones(2),
                         ).astype(floatX))
            complogp1 = st.norm.logpdf(x=value,
                                       loc=point["mu_g"]).astype(floatX)
            mixlogp1 = logsumexp(np.log(point["g_w"]).astype(floatX) +
                                 complogp1,
                                 axis=-1,
                                 keepdims=True)
            complogp2 = st.lognorm.logpdf(value, 1.0, 0.0,
                                          np.exp(point["mu_l"])).astype(floatX)
            mixlogp2 = logsumexp(np.log(point["l_w"]).astype(floatX) +
                                 complogp2,
                                 axis=-1,
                                 keepdims=True)
            complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1)
            mixmixlogpg = logsumexp(np.log(point["mix_w"]).astype(floatX) +
                                    complogp_mix,
                                    axis=-1,
                                    keepdims=False)
            return priorlogp, mixmixlogpg

        value = np.exp(self.norm_x)[:, None]
        priorlogp, mixmixlogpg = mixmixlogp(value, test_point)

        # check logp of mixture
        assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol)

        # check model logp
        assert_allclose(priorlogp + mixmixlogpg.sum(),
                        model.logp(test_point),
                        rtol=rtol)

        # check input and check logp again
        test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6])
        test_point["mu_g"] = np.exp(np.random.randn(nbr))
        priorlogp, mixmixlogpg = mixmixlogp(value, test_point)
        assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol)
        assert_allclose(priorlogp + mixmixlogpg.sum(),
                        model.logp(test_point),
                        rtol=rtol)
示例#26
0
# Log likelihood of Gaussian mixture distribution
def logp_gmix(mus, pi, taus, n_components):
    def logp_(value):        
        logps = [tt.log(pi[i]) + logp_normal(mus[i,:], taus[i], value) for i in range(n_components)]
        return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))
    return logp_

## Prior for model:

componentMean = ms + np.random.uniform(0,5,n_dimensions)
componentTau = np.random.uniform(0,2,n_dimensions) * np.eye(n_dimensions)

with pm.Model() as model:
    mus = MvNormal('mu', mu=pm.floatX(componentMean), tau=pm.floatX(componentTau), shape=(n_components, n_dimensions))
    pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(n_components)), shape=(n_components,))
    packed_L = [pm.LKJCholeskyCov('packed_L_%d' % i, n=n_dimensions, eta=2., sd_dist=pm.HalfCauchy.dist(2.5)) for i in range(n_components)]
    L = [pm.expand_packed_triangular(n_dimensions, packed_L[i]) for i in range(n_components)]
    sigmas = [pm.Deterministic('sigma_%d' % i, tt.dot(L[i],L[i].T)) for i in range(n_components)]
    taus = [tt.nlinalg.matrix_inverse(sigmas[i]) for i in range(n_components)]
    xs = DensityDist('x', logp_gmix(mus, pi, taus, n_components), observed=data)
    
with model:
    advi_fit = pm.fit(n=500000, obj_optimizer=pm.adagrad(learning_rate=1e-1))  
    
advi_trace = advi_fit.sample(10000)    
advi_summary = pm.summary(advi_trace)

pickle_out = open("advi_summary.pickle","wb")
pickle.dump(advi_summary, pickle_out)
pickle_out.close()
X_start = X_start[0:nObs]
O = O[0:nObs]

nObs = S_start.shape[0]
N = T.shape[0]  # Number of patients
M = pi_start.shape[0]  # Number of hidden states
K = Z_start.shape[0]  # Number of comorbidities
D = Z_start.shape[1]  # Number of claims
Dd = 16  # Maximum number of claims that can occur at once

#import pdb; pdb.set_trace()

model = Model()
with model:
    #Fails: #pi = Dirichlet('pi', a = as_tensor_variable([0.147026,0.102571,0.239819,0.188710,0.267137,0.054738]), shape=M, testval = np.ones(M)/float(M))
    pi = Dirichlet('pi', a=as_tensor_variable(pi_start.copy()), shape=M)
    pi_min_potential = Potential('pi_min_potential',
                                 TT.switch(TT.min(pi) < .001, -np.inf, 0))

    Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M))

    #S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs), testval=np.ones(nObs,dtype='int32'))
    S = DiscreteObsMJP('S',
                       pi=pi,
                       Q=Q,
                       M=M,
                       nObs=nObs,
                       observed_jumps=obs_jumps,
                       T=T,
                       shape=(nObs))
示例#28
0
    plt.figure(figsize=(5, 5))
    plt.scatter(data[:, 0], data[:, 1], c='g', alpha=0.5)
    plt.scatter(ms[0, 0], ms[0, 1], c='r', s=100)
    plt.scatter(ms[1, 0], ms[1, 1], c='b', s=100)
    
    from pymc3.math import logsumexp


    #Model original
    with pm.Model() as model:
        mus = [MvNormal('mu_%d' % i,
                        mu=pm.floatX(np.zeros(2)),
                        tau=pm.floatX(0.1 * np.eye(2)),
                        shape=(2,))
               for i in range(2)]
        pi = Dirichlet('pi', a=pm.floatX(0.1 * np.ones(2)), shape=(2,))
        
        xs = DensityDist('x', logp_gmix(mus, pi, np.eye(2)), observed=data)
        
#   
#    #Model for GMM clustering
#    with pm.Model() as model:
#        # cluster sizes
#        p = pm.Dirichlet('p', a=np.array([1., 1.]), shape=2)
#        # ensure all clusters have some points
#        p_min_potential = pm.Potential('p_min_potential', tt.switch(tt.min(p) < .1, -np.inf, 0))
#    
#    
#        # cluster centers
#        means = [MvNormal('mu_%d' % i,mu=pm.floatX(np.zeros(2)),tau=pm.floatX(0.1 * np.eye(2)),shape=(2,))
#               for i in range(2)]
示例#29
0
        def logp_gmix(mus, pi, taus, n_components):
            def logp_(value):
                logps = [
                    tt.log(pi[i]) + logp_normal(mus[i, :], taus[i], value)
                    for i in range(n_components)
                ]
                return tt.sum(
                    logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))

            return logp_

        # Sparse model with diagonal covariance:
        with pm.Model() as model:

            # Weights of each component:
            w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, ))

            # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout.
            mus_signal = MvNormal(
                'mus_signal',
                mu=pm.floatX(signalMean_priorMean),
                tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2),
                shape=n_dimensions)
            mus_background = MvNormal('mus_background',
                                      mu=pm.floatX(backgroundMean_priorMean),
                                      tau=pm.floatX(
                                          np.eye(n_dimensions) /
                                          backgroundMean_priorSD**2),
                                      shape=n_dimensions)
            mus = tt.fill_diagonal(
                tt.reshape(tt.tile(mus_background, n_components),
示例#30
0
    def setUp(self):
        #test Claims
        N = 5  # Number of patients
        self.N = N
        M = 3  # Number of hidden states
        self.M = M
        K = 2  # Number of comorbidities
        D = 20  # Number of claims
        Dd = 4  # Maximum number of claims that can occur at once
        min_obs = 2  # Minimum number of observed claims per patient
        max_obs = 4  # Maximum number of observed claims per patient
        #obs_jumps = np.ones((N,max_obs-1))
        obs_jumps = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1],
                              [1, 1, 1]])
        T = np.array([4, 2, 3, 4, 2])
        self.T = T
        nObs = T.sum()
        obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps])
        obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)])

        #O(4,4,5)
        #O = np.zeros((nObs,Dd),dtype='int8')
        O = np.zeros((Dd, max_obs, N), dtype='int8')
        #import pdb; pdb.set_trace()
        O[[0, 1, 3, 2, 3, 3], [0, 1, 3, 2, 3, 3], [0, 1, 4, 3, 3, 4]] = 1
        #O[[0,5,11,12],[0,1,2,3]] = 1
        O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)])

        Z_lo = np.array([[
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509
        ],
                         [
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509
                         ]])

        anchors = []
        mask = np.ones((K, D))
        for anchor in anchors:
            for hold in anchor[1]:
                mask[:, hold] = 0
                mask[anchor[0], hold] = 1
        Z_lo = Z_lo[mask.nonzero()]

        with Model() as self.model:
            self.pi = Dirichlet('pi',
                                a=as_tensor_variable([0.5, 0.5, 0.5]),
                                shape=M)
            pi_min_potential = Potential(
                'pi_min_potential',
                TT.switch(TT.min(self.pi) < .1, -np.inf, 0))
            self.Q = DiscreteObsMJP_unif_prior('Q',
                                               M=M,
                                               lower=0.0,
                                               upper=1.0,
                                               shape=(M, M))
            self.S = DiscreteObsMJP('S',
                                    pi=self.pi,
                                    Q=self.Q,
                                    M=M,
                                    nObs=nObs,
                                    observed_jumps=obs_jumps,
                                    T=T,
                                    shape=(nObs))
            self.B0 = Beta('B0', alpha=1., beta=1., shape=(K, M))
            self.B = Beta('B', alpha=1., beta=1., shape=(K, M))
            self.X = Comorbidities('X',
                                   S=self.S,
                                   B0=self.B0,
                                   B=self.B,
                                   T=T,
                                   shape=(nObs, K))
            #self.Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D))
            self.Z = Beta_with_anchors('Z',
                                       anchors=anchors,
                                       K=K,
                                       D=D,
                                       alpha=0.1,
                                       beta=1.,
                                       shape=(K, D))
            self.L = Beta('L', alpha=1., beta=1., shape=D)
            #L = Beta('L', alpha = 0.1, beta = 1, shape=D, transform=None)
            #L = Uniform('L', left = 0.0, right = 1.0, shape=D, transform=None)
            #L = Uniform('L', lower = 0.0, upper = 1.0, shape=D)
            self.testClaims = Claims('O_obs',
                                     X=self.X,
                                     Z=self.Z,
                                     L=self.L,
                                     T=T,
                                     D=D,
                                     O_input=O,
                                     shape=(nObs, Dd),
                                     observed=O)

            self.forS = ForwardS(vars=[self.S],
                                 N=N,
                                 T=T,
                                 nObs=nObs,
                                 observed_jumps=obs_jumps)
            self.forX = ForwardX(vars=[self.X],
                                 N=N,
                                 T=T,
                                 K=K,
                                 D=D,
                                 Dd=Dd,
                                 O=O,
                                 nObs=nObs)

        self.myTestPoint = {
            'Z_anchoredbeta':
            Z_lo,
            'Q_ratematrixoneway':
            np.array([0.1, 0.1]),
            'pi_stickbreaking':
            np.array([0.2, 0.1]),
            'S':
            np.array([[0, 0, 1, 1], [1, 1, 1, 1], [1, 1, 2, 2], [0, 2, 2, 2],
                      [0, 0, 0, 1]],
                     dtype=np.int32),
            'B0_logodds':
            np.array([[0., 1., 0.], [0., 0., 1.]]),
            'X':
            np.array([[[0, 1, 1, 1, 1], [0, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                       [1, 1, 1, 1, 1]],
                      [[1, 1, 0, 0, 1], [1, 1, 0, 1, 1], [1, 1, 1, 1, 1],
                       [1, 1, 1, 1, 1]]],
                     dtype=np.int8),
            'L_logodds':
            np.array([
                0.1, 0.1, 0.1, 0.1, 0.01, 0.01, 0.01, 0.01, 0.0011, 0.0011,
                0.0011, 0.0011, 0.0011, 0., 0.0101, 0.0101, 0.0101, 0.01, 0.01,
                0.01
            ]),
            'B_logodds':
            np.array([[1., 0., 1.], [0., 1., 0.]])
        }
        self.myTestPoint['S'] = np.concatenate(
            [self.myTestPoint['S'][i, 0:T[i]] for i in range(N)])
        self.myTestPoint['X'] = np.concatenate(
            [self.myTestPoint['X'][:, 0:T[i], i].T for i in range(N)])
        stepX_Correct = np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                                   [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]],
                                  [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                                   [1, 0, 0, 0, 0], [1, 0, 0, 0, 0]]],
                                 dtype=np.int8)

        stepX_Correct = np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 1, 0],
                                   [0, 0, 0, 1, 0], [0, 0, 0, 1, 0]],
                                  [[0, 1, 0, 0, 0], [0, 1, 0, 0, 0],
                                   [0, 1, 0, 0, 0], [0, 1, 0, 0, 1]]],
                                 dtype=np.int8)