def makeShapePrior(self, data, parts):

        inputcat = data.inputcat
        psfSize = data.psfsize
        pivot, m_slope, m_b, m_cov, c = bentvoigt3PsfDependence(
            data.options.steppsf, psfSize)
        parts.step_m_prior = pymc.MvNormalCov('step_m_prior',
                                              [pivot, m_b, m_slope], m_cov)

        @pymc.deterministic(trace=False)
        def shearcal_m(size=inputcat['size'], mprior=parts.step_m_prior):

            pivot = mprior[0]
            m_b = mprior[1]
            m_slope = mprior[2]

            m = np.zeros_like(size)
            m[size >= pivot] = m_b
            m[size < pivot] = m_slope * (size[size < pivot] - pivot) + m_b

            return np.ascontiguousarray(m.astype(np.float64))

        parts.shearcal_m = shearcal_m

        parts.step_c_prior = pymc.Normal('step_c_prior', c, 1. / (0.0004**2))

        @pymc.deterministic(trace=False)
        def shearcal_c(size=inputcat['size'], cprior=parts.step_c_prior):
            c = cprior * np.ones_like(size)
            return np.ascontiguousarray(c.astype(np.float64))

        parts.shearcal_c = shearcal_c
        parts.sigma = pymc.Uniform('sigma', 0.15, 0.5)  # sigma
        parts.gamma = pymc.Uniform('gamma', 0.003, 0.1)  # gamma
Exemplo n.º 2
0
    def test_fit(self):
        "Compares EP results with MCMC results with a real age-corrected likelihood."
        nug = random.normal()**2 * .3
        N_exam = ones(self.N) * 1000
        lps, pos = EP.EP_MAP.simulate_data(
            self.M_pri, self.C_pri, self.N, nug, N_exam,
            self.correction_factor_array.shape[1],
            self.correction_factor_array, self.age_lims)

        # Do EP algorithm
        E = EP.EP(self.M_pri, self.C_pri, lps, nug * ones(self.N))
        E.fit(iter, tol=tol)
        # from IPython.Debugger import Pdb
        # Pdb(color_scheme='Linux').set_trace()

        x = pm.MvNormalCov('x', self.M_pri, self.C_pri)
        eps = pm.Normal('eps', x, 1. / nug)

        @pm.potential
        def y(eps=eps, lps=lps):
            return sum([lps[i](eps[i]) for i in xrange(len(eps))])

        M = pm.MCMC([x, eps, y])
        M.use_step_method(pm.AdaptiveMetropolis, [eps, x])
        M.isample(100000)

        post_V = var(M.trace('x')[20000:], axis=0)
        post_M = mean(M.trace('x')[20000:], axis=0)

        pm.Matplot.plot(M)

        assert_almost_equal(post_M, E.M, 1)
        assert_almost_equal(post_V, diag(E.C), 1)
Exemplo n.º 3
0
    def walk(self, niter=10000, nburn=5000, method='AdaptiveMetropolis'):
        '''
        March the MCMC.

        Kwargs:
            * niter             : Number of steps to walk
            * nburn             : Numbero of steps to burn
            * method            : One of the stepmethods of PyMC2

        Returns:
            * None
        '''

        # Define the stochastic function
        @pymc.stochastic
        def prior(value=self.initSampleVec):
            prob = 0.
            for prior, val in zip(self.Priors, value):
                prior.set_value(val)
                prob += prior.logp
            return prob

        # Create the deterministics
        likelihood = []
        for like in self.Likelihoods:

            # Get what I need
            data, dobs, Cd, vertical = like

            # Create the forward method
            @pymc.deterministic(plot=False)
            def forward(theta=prior):
                return self.Predict(theta, data, vertical=vertical)

            # Build likelihood function
            likelihood.append(
                pymc.MvNormalCov('Data Likelihood: {}'.format(data.name),
                                 mu=forward,
                                 C=Cd,
                                 value=dobs,
                                 observed=True))

        # List of pdf to sample
        pdfs = [prior] + likelihood

        # Create a sampler
        sampler = pymc.MCMC(pdfs)

        # Make sure step method is what is asked for
        sampler.use_step_method(getattr(pymc, method), prior)

        # Sample
        sampler.sample(iter=niter, burn=nburn)

        # Save the sampler
        self.sampler = sampler
        self.nsamples = niter - nburn

        # All done
        return
Exemplo n.º 4
0
def getModel(t):
    k = getKernel(3, t)
    mu = getMean(0, t)
    #     k2 = np.multiply(np.eye(len(k)-1),k[:-1,:-1]);
    #     k2 = np.multiply(np.eye(len(k)),k);
    GP1 = pm.MvNormalCov('GP1', mu=mu, C=k)
    #@UndefinedVariable
    GP2 = pm.MvNormalCov('GP2', mu=GP1[:-2], C=np.eye(len(k) - 2) * 50)
    #@UndefinedVariable
    GP3 = pm.MvNormalCov('GP3',
                         mu=GP1[-2:],
                         C=np.eye(2) * 50,
                         observed=True,
                         value=[1000, 1000])
    #@UndefinedVariable
    #     GP3 = pm.Normal('GP3', mu=GP1[-1], tau=1/k[-1,-1]); #@UndefinedVariable
    return pm.Model([GP1, GP2, GP3])
Exemplo n.º 5
0
def mk_node(species, node_name, node, parent_idx, effects, path, has_siblings=False):
    paths = reduce(lambda x, y: "{}__{}".format(x, y).replace(' ', '_'), path)

    if has_siblings:
        theta.append(pm.MvNormalCov('theta_{}'.format(paths),
                                    mu=theta[parent_idx],
                                    C=400*np.eye(num_traits),
                                    value=node_means[species]))
        sigma.append(pm.WishartCov('sigma_{}'.format(paths),
                                   n=num_traits+1,
                                   C=sigma[parent_idx],
                                   value=node_matrices[species]))

        parent_idx = len(theta) - 1

    if not node.items():
        obs_data = np.array(data.ix[(data['species'] == str(n.taxon)) &
   	        reduce(operator.iand,
                map(lambda s: data[s[0]] == s[1],
                    zip(effects, path[1:]))), 0:num_traits])

        data_list.append(pm.MvNormalCov('data_{}'.format(paths),
                                            mu=theta[parent_idx],
                                            C=sigma[parent_idx],
                                            value=obs_data,
                                            observed=True))


        # Slow method to simulate n populations from posterior
        #ds = []
        #for i in xrange(0,obs_data.shape[0]):
        #    ds.append(pm.MvNormalCov('data_{}_{}'.format(paths, i),
        #                                    mu=theta[parent_idx],
        #                                    C=sigma[parent_idx]
        #                                    ))

        #data_sim_list.append(ds)

        return

    has_siblings = len(node.keys()) > 1
    for k in node.keys():
        mk_node(species, k, node[k], parent_idx, effects, path + [k], has_siblings)
Exemplo n.º 6
0
def test_square():
    iw = pymc.InverseWishart("A", n=2, Tau=np.eye(2))
    mnc = pymc.MvNormalCov("v",
                           mu=np.zeros(2),
                           C=iw,
                           value=np.zeros(2),
                           observed=True)

    M = pymc.MCMC([iw, mnc])
    M.sample(8)
Exemplo n.º 7
0
def getModel():
    B = pm.Beta('1-Beta', alpha=1.2, beta=5)
    #@UndefinedVariable
    #     p_B = pm.Lambda('p_Bern', lambda b=B: np.where(b==0, 0.9, 0.1), doc='Pr[Bern|Beta]');
    C = pm.Categorical('2-Cat', [1 - B, B])
    #@UndefinedVariable
    #     C = pm.Categorical('1-Cat', [0.2, 0.4, 0.1, 0.3], observed=True, value=3); #@UndefinedVariable
    p_N = pm.Lambda('p_Norm',
                    lambda n=C: np.where(n == 0, [0, 0], [5, 5]),
                    doc='Pr[Norm|Cat]')
    N = pm.MvNormalCov('3-Norm_2D', mu=p_N, C=np.eye(2))
    #@UndefinedVariable
    #     N = pm.MvNormal('2-Norm', mu=p_N, tau=np.eye(2,2), observed=True, value=[2.5,2.5]); #@UndefinedVariable
    return pm.Model([B, C, N])
Exemplo n.º 8
0
def model_factory():
    C = P.Uniform("C", value=C0, lower=CMIN, upper=CMAX)
    
    @P.deterministic(plot=False)
    def response(C=C):
        return model(C)

    Y = P.MvNormalCov(
        'Y',
        response,
        (SIGMA ** 2) * np.eye(NT),
        observed=True,
        plot=False,
        value=data,
    )

    return locals()
Exemplo n.º 9
0
def getModel(t, iObs):
    k = getKernel(3, t)
    mu = getMean(2, t)
    aNodes = []
    GP1 = pm.MvNormalCov('GP1', mu=mu, C=k)
    #@UndefinedVariable
    aNodes.append(GP1)
    for i in range(len(t)):
        aNodes.append(pm.Normal('N' + str(i + 1), mu=GP1[i], tau=1 / 50))
        #@UndefinedVariable
        if i in iObs:
            aNodes.append(
                pm.Normal('oN' + str(i + 1),
                          mu=GP1[i],
                          tau=1 / 50,
                          observed=True,
                          value=-100))
            #@UndefinedVariable
    return pm.Model(aNodes)
    def makeShapePrior(self, data, parts):

        inputcat = data.inputcat

        if data.wtg_shearcal:  # use WTG STEP2 shear calibration
            psfSize = data.psfsize  # rh, in pixels

            # disabled since we don't have a STEP calibration for the regauss pipeline in DMSTACK
            m_slope, m_b, m_cov, c = self.psfDependence(
                data.options.steppsf, psfSize)

            parts.step_m_prior = pymc.MvNormalCov('step_m_prior',
                                                  [m_b, m_slope], m_cov)

            @pymc.deterministic(trace=False)
            def shearcal_m(size=inputcat['size'], mprior=parts.step_m_prior):

                m_b = mprior[0]
                m_slope = mprior[1]

                m = np.zeros_like(size)
                m[size >= 2.0] = m_b
                m[size < 2.0] = m_slope * (size[size < 2.0] - 2.0) + m_b
                return np.ascontiguousarray(m.astype(np.float64))

            parts.shearcal_m = shearcal_m

            parts.step_c_prior = pymc.Normal('step_c_prior', c,
                                             1. / (0.0004**2))

            @pymc.deterministic(trace=False)
            def shearcal_c(size=inputcat['size'], cprior=parts.step_c_prior):
                c = cprior * np.ones_like(size)
                return np.ascontiguousarray(c.astype(np.float64))

            parts.shearcal_c = shearcal_c

        else:  # No shear calibration
            parts.shearcal_m = np.zeros(len(inputcat))
            parts.shearcal_c = np.zeros(len(inputcat))

        parts.sigma = pymc.Uniform('sigma', 0.15, 0.5)  # sigma
        parts.gamma = pymc.Uniform('gamma', 0.003, 0.1)  # gamma
Exemplo n.º 11
0
def makeSplitPrior(parts):

    parts.shape_params = np.empty(parts.nshapebins, dtype=object)
    parts.shape_params_mv = np.empty(parts.nshapebins, dtype=object)
    parts.shape_params_fixed = np.empty(parts.nshapebins, dtype=object)

    for i, shapeparams in enumerate(parts.shapedistro_params):

        parts.shape_params_mv[i] = pymc.MvNormalCov('shape_params_mv_%d' % i,
                                                    shapeparams[1],
                                                    shapeparams[2],
                                                    trace=False)

        parts.shape_params_fixed[i] = shapeparams[0]

        @pymc.deterministic(name='shape_params_%d' % i, trace=True)
        def sp(fixed=parts.shape_params_fixed[i], mv=parts.shape_params_mv[i]):

            return np.hstack([fixed, mv])

        parts.shape_params[i] = sp
Exemplo n.º 12
0
def getModel():
    D = pm.Dirichlet('1-Dirichlet', theta=[3,2,4]); #@UndefinedVariable
    C1 = pm.Categorical('2-Cat', D); #@UndefinedVariable
    C2 = pm.Categorical('10-Cat', D); #@UndefinedVariable
    C3 = pm.Categorical('11-Cat', D); #@UndefinedVariable
    W0_0 = pm.WishartCov('4-Wishart0_1', n=5, C=np.eye(2)); #@UndefinedVariable
    N0_1 = pm.MvNormalCov('5-Norm0_1', mu=[-20,-20], C=np.eye(2)); #@UndefinedVariable
    N0_2 = pm.MvNormalCov('6-Norm0_2', mu=[0,0], C=np.eye(2)); #@UndefinedVariable
    N0_3 = pm.MvNormalCov('7-Norm0_3', mu=[20,20], C=np.eye(2)); #@UndefinedVariable
    aMu = [N0_1.value, N0_2.value, N0_3.value];
    fL1 = lambda n=C1: np.select([n==0, n==1, n==2], aMu);
    fL2 = lambda n=C2: np.select([n==0, n==1, n==2], aMu);
    fL3 = lambda n=C3: np.select([n==0, n==1, n==2], aMu);
    p_N1 = pm.Lambda('p_Norm1', fL1, doc='Pr[Norm|Cat]');
    p_N2 = pm.Lambda('p_Norm2', fL2, doc='Pr[Norm|Cat]');
    p_N3 = pm.Lambda('p_Norm3', fL3, doc='Pr[Norm|Cat]');
    N = pm.MvNormalCov('3-Norm', mu=p_N1, C=W0_0); #@UndefinedVariable
    obsN1 = pm.MvNormalCov('8-Norm', mu=p_N2, C=W0_0, observed=True, value=[-20,-20]); #@UndefinedVariable @UnusedVariable
    obsN2 = pm.MvNormalCov('9-Norm', mu=p_N3, C=W0_0, observed=True, value=[20,20]); #@UndefinedVariable @UnusedVariable
    return pm.Model([D,C1,C2,C3,N,W0_0,N0_1,N0_2,N0_3,N,obsN1,obsN2]);
Exemplo n.º 13
0
import pandas as pd
import numpy as np
import pymc as pm
import dendropy

data = pd.read_csv("../dados/dados5sp.csv")

t = dendropy.Tree.get_from_string("(B, ((C, E),(A,D)))", "newick")
num_leafs = len(t.leaf_nodes())
num_traits = 4

root = t.seed_node

theta = [
    pm.MvNormalCov('theta_0',
                   mu=np.array(data.ix[:, 0:num_traits].mean()),
                   C=np.eye(num_traits) * 10.,
                   value=np.zeros(num_traits))
]

sigma = [
    pm.WishartCov('sigma_0',
                  n=num_traits + 1,
                  C=np.eye(num_traits) * 10.,
                  value=np.eye(num_traits))
]

tree_idx = {str(root): 0}

i = 1
for n in t.nodes()[1:]:
    parent_idx = tree_idx[str(n.parent_node)]
Exemplo n.º 14
0
            Cov[start:start + manifolds[i].N,
                start:start + manifolds[i].N] = Cd
            start += manifolds[i].N

        logger.info('Plot covariance matrix')
        fig = plt.figure(2)
        ax = fig.add_subplot(111)
        cax = plt.imshow(Cov)
        fig.colorbar(cax)
        fig.savefig(outdir + '/insar/' + 'COV_mat.eps', format='EPS')
        plt.show()
        return Cov

    d = pymc.MvNormalCov('Data',
                         mu=forward,
                         C=Cov(),
                         value=data(),
                         observed=True)

# autocovariance only
else:

    def Cov():
        Cov = np.zeros((N))
        start = 0
        for i in xrange(len(manifolds)):
            Cd = np.diag(manifolds[i].sigmad**2, k=0)
            # And the diagonal of its inverse
            Cov[start:start + manifolds[i].N] = np.diag(np.linalg.inv(Cd))
            # Save Covariance matrix for each data set
            manifolds[i].Cd = np.diag(Cd)
Exemplo n.º 15
0
try:
    from types import UnboundMethodType
except ImportError:
    # On Python 3, unbound methods are just functions.
    def UnboundMethodType(func, inst, cls):
        return func


submod = pm.gp.GPSubmodel(
    'x5', pm.gp.Mean(lambda x: 0 * x),
    pm.gp.FullRankCovariance(pm.gp.cov_funs.exponential.euclidean,
                             amp=1,
                             scale=1), np.linspace(-1, 1, 21))
x = [
    pm.MvNormalCov('x0', np.zeros(5), np.eye(5)),
    pm.Gamma('x1', 4, 4, size=3),
    pm.Gamma('x2', 2, 2),
    pm.Binomial('x3', 100, .4),
    pm.Bernoulli('x4', .5), submod.f
]

do_not_implement_methods = [
    'iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'imod', 'ipow', 'ilshift',
    'irshift', 'iand', 'ixor', 'ior'
]
uni_methods = ['neg', 'pos', 'abs', 'invert', 'index']
rl_bin_methods = [
    'div', 'truediv', 'floordiv', 'mod', 'divmod', 'pow', 'lshift', 'rshift',
    'and', 'xor', 'or'
] + ['add', 'mul', 'sub']
Exemplo n.º 16
0
    new_mean = new_mean/len(child_labels)
    return new_matrix, sample, new_mean

# Calculando as matrizes e tamanhos amostrais para todos os nodes

for n in t.postorder_node_iter():
    if str(n) not in node_matrices:
        node_matrices[str(n)], node_sample_size[str(n)], node_means[str(n)] = matrix_mean(n.child_nodes())

# Agora comeca o PyMC

root = t.seed_node

theta = [pm.MvNormalCov('theta_0',
                        #mu=np.array(data.ix[:, 0:num_traits].mean()),
                        mu=np.zeros(num_traits),
                        C=np.eye(num_traits)*10.,
                        value=node_means[str(root)])]

sigma = [pm.WishartCov('sigma_0',
                       n=num_traits+1,
                       C=np.eye(num_traits)*10.,
                       value=node_matrices[str(root)])]

tree_idx = {str(root): 0}

i = 1
for n in t.nodes()[1:]:
    parent_idx = tree_idx[str(n.parent_node)]

    theta.append(pm.MvNormalCov('theta_{}'.format(str(i)),
Exemplo n.º 17
0
for n in t.postorder_node_iter():
    if node_name(n) not in node_matrices:
        node_matrices[node_name(n)], node_sample_size[node_name(
            n)], node_means[node_name(n)] = matrix_mean(n.child_nodes())

# Agora comeca o PyMC

root = t.seed_node

theta = {
    node_name(root):
    pm.MvNormalCov(
        'theta_0',
        mu=np.array(data.ix[:, 0:num_traits].mean()),
        #value=node_means[str(root)],
        value=np.zeros(num_traits),
        #mu=np.zeros(num_traits),
        C=np.eye(num_traits) * 100.)
}

sigma = {
    node_name(root):
    pm.WishartCov(
        'sigma_0',
        value=node_matrices[node_name(root)],
        #value=np.eye(num_traits),
        n=num_traits + 1,
        C=node_matrices[node_name(root)])
}
#C=np.eye(num_traits)*100.)}
Exemplo n.º 18
0
def create_model(region_name,
                 all_pts,
                 name,
                 scale_params,
                 amp_params,
                 cpus,
                 with_stukel,
                 spatial,
                 chunk,
                 covariate_names,
                 disttol,
                 ttol,
                 AM_delay=50000,
                 AM_interval=100,
                 AM_sd=.1,
                 crashed_db=None):

    # ======================================
    # = Make sure it's safe to make output =
    # ======================================

    if not spatial:
        name += '_nonspatial'
    if with_stukel:
        name += '_stukel'
    for cname in covariate_names:
        name += '_%s' % cname

    if name + '.hdf5' in os.listdir('.'):
        print
        print """=============
= ATTENTION =
============="""
        print

        OK = False
        while not OK:
            y = raw_input(
                'Database %s already exists.\nDo you want to delete it? Error will be raised otherwise.\n>> '
                % (name + '.hdf5'))
            if y.lower() == 'yes':
                print 'OK, moving to trash.'
                os.system('mv %s ~/.Trash' % (name + '.hdf5'))
                OK = True
            elif y.lower() == 'no':
                raise RuntimeError, 'But dash it all! I mean to say, what?'
            else:
                y = raw_input('Please type yes or no.\n>> ')

    norun_name = '_'.join(name.split('_')[:2])

    C_time = [0.]
    f_time = [0.]
    M_time = [0.]

    # =============================
    # = Preprocess data, uniquify =
    # =============================

    # Convert latitude and longitude from degrees to radians.
    lon = all_pts.LONG * np.pi / 180.
    lat = all_pts.LAT * np.pi / 180.

    # Convert time to end year - 2009 (no sense forcing mu to adjust by too much).
    # t = all_pts.YEAR_START-2009. + all_pts.MONTH_STAR / 12.
    t = all_pts.TIME - 2009

    # Make lon, lat, t triples.
    data_mesh = np.vstack((lon, lat, t)).T

    disttol = disttol / 6378.
    ttol = ttol / 12.

    # Find near spatiotemporal duplicates.
    if spatial:
        ui = []
        ri = []
        fi = []
        ti = []
        dx = np.empty(1)
        for i in xrange(data_mesh.shape[0]):
            match = False
            for j in xrange(len(ui)):
                pm.gp.geo_rad(dx, data_mesh[i, :2].reshape((1, 2)),
                              data_mesh[ui[j], :2].reshape((1, 2)))
                dt = abs(t[ui[j]] - t[i])

                if dx[0] < disttol and dt < ttol:
                    match = True
                    fi.append(j)
                    ti[j].append(i)
                    ri.append(i)
                    break

            if not match:
                fi.append(len(ui))
                ui.append(i)
                ti.append([i])
        ui = np.array(ui)
        ti = [np.array(tii) for tii in ti]
        fi = np.array(fi)
        ri = np.array(ri)
        logp_mesh = data_mesh[ui, :]
        if len(ri) > 0:
            repeat_mesh = data_mesh[ri, :]
        else:
            repeat_mesh = np.array([])
    else:
        ui = np.arange(len(t))
        ti = [np.array([uii]) for uii in ui]
        fi = ui
        ri = np.array([])
        logp_mesh = data_mesh
        repeat_mesh = np.array([])

    # =====================
    # = Create PyMC model =
    # =====================

    init_OK = False
    while not init_OK:

        # Flat prior on m_const (mu).
        m_const = pm.Uninformative('m_const', value=-3.)
        if with_stukel:
            m_const.value = -1.1

        # Flat prior on coefficient of time (k).
        t_coef = pm.Uninformative('t_coef', value=.1)
        if with_stukel:
            t_coef.value = -.4

        # Inverse-gamma prior on nugget variance V.
        tau = pm.Gamma('tau', value=2., alpha=.001, beta=.001 / .25)
        V = pm.Lambda('V', lambda tau=tau: 1. / tau)

        vars_to_writeout = ['V', 'm_const', 't_coef']

        # Pull out covariate information.
        # The values of covariate_dict are (Stochastic, interpolated covariate) tuples.
        # Interpolation is done to the data mesh.
        covariate_dict = {}
        for cname in covariate_names:
            # hf = openFile(mbgw.__path__[0] + '/auxiliary_data/' + cname + '.hdf5')
            if cname == 'periurb':
                this_interp_covariate = all_pts.URB_CLS == 2
                if np.sum(all_pts.URB_CLS == 3) < 10:
                    print 'Warning: Very few urban points, using same coefficient for urban and periurban'
                    this_interp_covariate += all_pts.URB_CLS == 3
            elif cname == 'urb':
                if np.sum(all_pts.URB_CLS == 3) >= 10:
                    this_interp_covariate = all_pts.URB_CLS == 3
                else:
                    this_interp_covariate = None
            else:
                this_cov = getattr(auxiliary_data, cname)
                this_interp_covariate = nearest_interp(this_cov.long[:],
                                                       this_cov.lat[:],
                                                       this_cov.data,
                                                       data_mesh[:, 0],
                                                       data_mesh[:, 1])
            if this_interp_covariate is not None:
                this_coef = pm.Uninformative(cname + '_coef', value=0.)
                covariate_dict[cname] = (this_coef, this_interp_covariate)

        # Lock down parameters of Stukel's link function to obtain standard logit.
        # These can be freed by removing 'observed' flags, but mixing gets much worse.
        if with_stukel:
            a1 = pm.Uninformative('a1', .5)
            a2 = pm.Uninformative('a2', .8)
        else:
            a1 = pm.Uninformative('a1', 0, observed=True)
            a2 = pm.Uninformative('a2', 0, observed=True)

        transformed_spatial_vars = [V]
        if spatial:
            # Make it easier for inc (psi) to jump across 0: let nonmod_inc roam freely over the reals,
            # and mod it by pi to get the 'inc' parameter.
            nonmod_inc = pm.Uninformative('nonmod_inc', value=.5)
            inc = pm.Lambda('inc',
                            lambda nonmod_inc=nonmod_inc: nonmod_inc % np.pi)

            # Use a uniform prior on sqrt ecc (sqrt ???). Using a uniform prior on ecc itself put too little
            # probability mass on appreciable levels of anisotropy.
            sqrt_ecc = pm.Uniform('sqrt_ecc', value=.1, lower=0., upper=1.)
            ecc = pm.Lambda('ecc', lambda s=sqrt_ecc: s**2)

            # Subjective skew-normal prior on amp (the partial sill, tau) in log-space.
            # Parameters are passed in in manual_MCMC_supervisor.
            log_amp = pm.SkewNormal('log_amp', **amp_params)
            amp = pm.Lambda('amp', lambda log_amp=log_amp: np.exp(log_amp))

            # Subjective skew-normal prior on scale (the range, phi_x) in log-space.
            log_scale = pm.SkewNormal('log_scale', **scale_params)
            scale = pm.Lambda('scale',
                              lambda log_scale=log_scale: np.exp(log_scale))

            # Exponential prior on the temporal scale/range, phi_t. Standard one-over-x
            # doesn't work bc data aren't strong enough to prevent collapse to zero.
            scale_t = pm.Exponential('scale_t', .1)

            # Uniform prior on limiting correlation far in the future or past.
            t_lim_corr = pm.Uniform('t_lim_corr', 0, 1, value=.8)

            # # Uniform prior on sinusoidal fraction in temporal variogram
            sin_frac = pm.Uniform('sin_frac', 0, 1)

            vars_to_writeout.extend([
                'inc', 'ecc', 'amp', 'scale', 'scale_t', 't_lim_corr',
                'sin_frac'
            ])
            transformed_spatial_vars.extend([inc, ecc, amp, scale])

        # Collect stochastic variables with observed=False for the adaptive Metropolis stepper.
        trial_stochs = [v[0] for v in covariate_dict.itervalues()
                        ] + [m_const, tau, a1, a2, t_coef]
        if spatial:
            trial_stochs = trial_stochs + [
                nonmod_inc, sqrt_ecc, log_amp, log_scale, scale_t, t_lim_corr,
                sin_frac
            ]
        nondata_stochs = []
        for stoch in trial_stochs:
            if not stoch.observed:
                nondata_stochs.append(stoch)

        # Collect variables to write out

        # The mean of the field
        @pm.deterministic
        def M(m=m_const, tc=t_coef):
            return pm.gp.Mean(st_mean_comp, m_const=m, t_coef=tc)

        # The mean, evaluated  at the observation points, plus the covariates
        @pm.deterministic(trace=False)
        def M_eval(M=M, lpm=logp_mesh, cv=covariate_dict):
            out = M(lpm)
            for c in cv.itervalues():
                out += c[0] * c[1][ui]
            return out

        # Create covariance and MV-normal F if model is spatial.
        if spatial:
            try:
                # A constraint on the space-time covariance parameters that ensures temporal correlations are
                # always between -1 and 1.
                @pm.potential
                def st_constraint(sd=.5, sf=sin_frac, tlc=t_lim_corr):
                    if -sd >= 1. / (-sf * (1 - tlc) + tlc):
                        return -np.Inf
                    else:
                        return 0.

                # A Deterministic valued as a Covariance object. Uses covariance my_st, defined above.
                @pm.deterministic
                def C(amp=amp,
                      scale=scale,
                      inc=inc,
                      ecc=ecc,
                      scale_t=scale_t,
                      t_lim_corr=t_lim_corr,
                      sin_frac=sin_frac):
                    return pm.gp.FullRankCovariance(my_st,
                                                    amp=amp,
                                                    scale=scale,
                                                    inc=inc,
                                                    ecc=ecc,
                                                    st=scale_t,
                                                    sd=.5,
                                                    tlc=t_lim_corr,
                                                    sf=sin_frac,
                                                    n_threads=cpus)

                # The evaluation of the Covariance object.
                @pm.deterministic(trace=False)
                def C_eval(C=C):
                    return C(logp_mesh, logp_mesh)

                # The field evaluated at the uniquified data locations
                f = pm.MvNormalCov('f', M_eval, C_eval, value=M_eval.value)

                # The field evaluated at all the data locations
                @pm.deterministic(trace=False)
                def f_eval(f=f):
                    return f[fi]

                init_OK = True
            except pm.ZeroProbability, msg:
                print 'Trying again: %s' % msg
                init_OK = False
                gc.collect()

        # if not spatial
        else:
            C = None

            # The field is just the mean, there's no spatially-structured component.
            @pm.deterministic
            def f(M=M_eval):
                return M[fi]

            f_eval = f

            init_OK = True
def complex_hierarchical_model(y, X, t):
    """ PyMC model for the complicated example given in section
    3.2.1::

        y_ij ~ N(mu_j - exp(beta_j)t_ij - exp(gamma_j)t_ij^2, sigma_j^2)
        gamma_j | sigma^2, xi, X_j ~ N(eta_0 + eta_1 X_j + eta_2 X_j^2, omega^2)
        beta_j | gamma_j, sigma^2, xi, X_j ~ N(delta_beta_0 + delta_beta_1 X_j + delta_beta_2 X_j^2 + delta_beta_3 gamma_j, omega_beta^2)
        mu_j | gamma_j, beta_j, sigma^2, xi, X_j ~ N(delta_mu_0 + delta_mu_1 X_j + delta_mu_2 X_j^2 + delta_mu_3 gamma_j + delta_mu_4 beta_j, omega_mu^2)

        eta = (eta_0, eta_1, eta_2, log(omega))'
        delta_beta = (delta_beta_0, delta_beta_1, delta_beta_2, delta_beta_3, log(omega_beta))'
        delta_mu = (delta_mu_0, delta_mu_1, delta_mu_2, delta_mu_3, log(omega_mu))'
        xi = (eta, delta_beta, delta_mu)
        eta ~ MVNormal(M, C)
        delta_beta, delta_mu ~ Normal(m, s)

    Parameters
    ----------
    y : list, len(y) = J, y[j][i] = measurement i on patient j
    X : list, len(X) = J, X[j] = baseline measurement for patient j
    t : list, len(t) = J, t[j][i] = time of measurement i on patient j
    """

    J = len(y)
    
    # hyper-priors, not specified in detail in paper
    m = 0.
    s = 1.
    M = pl.zeros(4)
    r = pl.array([[  1, .57, .18, .56],
                  [.57,   1, .72, .16],
                  [.18, .72,   1, .14],
                  [.56, .16, .14,   1]])

    eta = mc.MvNormalCov('eta', M, r, value=M)
    omega = mc.Lambda('omega', lambda eta=eta: pl.exp(eta[-1]))
    
    delta_beta = mc.Normal('delta_beta', m, s**-2, value=m*pl.ones(5))
    omega_beta = mc.Lambda('omega_beta', lambda delta_beta=delta_beta: pl.exp(delta_beta[-1]))

    delta_mu = mc.Normal('delta_mu', m, s**-2, value=m*pl.ones(5))
    omega_mu = mc.Lambda('omega_mu', lambda delta_mu=delta_mu: pl.exp(delta_mu[-1]))

    gamma = mc.Lambda('gamma', lambda eta=eta: eta[0] + eta[1]*X + eta[2]*X**2)
    beta = mc.Lambda('beta', lambda delta_beta=delta_beta, gamma=gamma: delta_beta[0] + delta_beta[1]*X + delta_beta[2]*X**2 + delta_beta[3]*gamma)
    mu = mc.Lambda('mu', lambda delta_mu=delta_mu, gamma=gamma, beta=beta: delta_mu[0] + delta_mu[1]*X + delta_mu[2]*X**2 + delta_mu[3]*gamma + delta_mu[4]*beta)

    sigma = mc.Uniform('sigma', 0., 10., value=.1*pl.ones(J))
    y_exp = [mc.Lambda('y_exp_%d'%j, lambda mu=mu, beta=beta, gamma=gamma, j=j: mu[j] - pl.exp(beta[j])*t[j] - pl.exp(gamma[j])*t[j]**2) for j in range(J)]
    @mc.potential
    def y_obs(y_exp=y_exp, sigma=sigma, y=y):
        logp = 0.
        for j in range(J):
            missing = pl.isnan(y[j])
            logp += mc.normal_like(y[j][~missing], y_exp[j][~missing], sigma[j]**-2)
        return logp

    y_pred = [mc.Normal('y_pred_%d'%j, y_exp[j], sigma[j]**-2) for j in range(J)]

    eta_cross_eta = mc.Lambda('eta_cross_eta', lambda eta=eta: [eta[0]*eta[1], eta[0]*eta[2], eta[0]*eta[3], eta[1]*eta[2], eta[1]*eta[2], eta[2]*eta[3]])

    return vars()
Exemplo n.º 20
0
def CalcGP(sampledata, newpoints, ntrials=500, nburn=200, nthin=1):
    """
    Return estimates of the function sampled at sampledata as a Gaussian Process, at newpoints.

    The sampledata should be Nsamples by Ndim + 1 (values as last column).
    The newpoints should be Npoints by Ndim.
    """
    nsamples, ndims = np.shape(sampledata)
    ndims -= 1 #There's an extra dimension corresponding to the function value.
    y = sampledata[:,-1]
    samplepoints = sampledata[:, :-1]
    #Generate containers for the theta and exponent values...
    #The parameters are random variables.
    thetas = np.empty(ndims, dtype=object)
    exponents = np.empty(ndims, dtype=object)

    for i in range(ndims):
      #I'll arbitrarily assume the thetas are in the range [-10, 10].
      thetas[i] = pymc.Uniform('theta_{0}'.format(i), lower=-10, upper=10, value=1)
      exponents[i] = pymc.Uniform('exponent_{0}'.format(i), lower=0, upper=100, value=2)

#==============================================================================
#     @pymc.deterministic(plot=False, dtype=float)
#     def distance(point1, point2, thetas=thetas, exponents=exponents):
#       """
#       Return the non-Euclidean distance metric.
#       """
#       absvector = np.abs(point1 - point2)
#       return sum(thetas*absvector**exponents)
#==============================================================================
    def f(x):
      return np.exp(float(x))
    vf = np.vectorize(f)

    @pymc.deterministic(plot=False)
    def corr(samples=samplepoints, nsamples=nsamples, thetas=thetas, exponents=exponents):
      """
      Return the correlation matrix.
      """
      #The correlation matrix is nsamples x nsamples
      #corrmatrix = np.empty([nsamples, nsamples])

      #tmp = -np.sum(thetas[:,None]*np.abs(samples.T - samples[:,:,None])**exponents[:,None], axis=1)
      return vf(-np.sum(thetas[:,None]*np.abs(samples.T - samples[:,:,None])**exponents[:,None], axis=1))
#==============================================================================
#       for row in range(nsamples):
#         for col in range(nsamples):
#           absvector = np.abs(samplepoints[row] - samplepoints[col])
#           corrmatrix[row, col] = np.exp(float(-np.sum(thetas*absvector**exponents)))
#
#       return corrmatrix + 1e-10*np.eye(nsamples)
#==============================================================================

    @pymc.deterministic(plot=False, dtype=float)
    def mean(corrmat=corr, y=y, nsamples=nsamples):
      """
      The maximum likelihood mean is entirely defined by the correlation matrix.
      """
      I_vec = np.ones(nsamples)
      try:
        inv = np.linalg.inv(corrmat)
      except np.linalg.LinAlgError:
        print("I'm having trouble inverting the correlation matrix; conditioning...")
        inv = np.linalg.inv(corrmat + 1e-12*np.eye(nsamples))

      return I_vec * np.dot(I_vec, np.dot(inv, y)) / np.dot(I_vec, np.dot(inv, I_vec))

    @pymc.deterministic(plot=False, dtype=float)
    def variance(corrmat=corr, y=y, themean=mean, nsamples=nsamples):
      """
      The maximum likelihood variance is entirely defined by the correlation matrix.
      """
      try:
        inv = np.linalg.inv(corrmat)
      except np.linalg.LinAlgError:
        print("I'm having trouble inverting the correlation matrix; conditioning...")
        inv = np.linalg.inv(corrmat + 1e-12*np.eye(nsamples))

      return np.dot(y - themean, np.dot(inv, y - themean))/nsamples

    Data = pymc.MvNormalCov('Data', mu=mean, C=variance*corr, value=y, observed=True)

    model = pymc.Model([thetas, exponents, corr, mean, variance, Data])
    thegraph = pymc.graph.graph(model, prog=r'C:\Program Files (x86)\Graphviz2.38\bin\dot.exe', format='png', path=r'.')
    imdata = plt.imread('.\\container.png')
    plt.title("A schematic of the model.")

    mcmc = pymc.MCMC([thetas, exponents, corr, mean, variance, Data])
    mcmc.sample(iter=ntrials, burn=nburn, thin=nthin)
    thegraph = pymc.graph.graph(mcmc, prog=r'C:\Program Files (x86)\Graphviz2.38\bin\dot.exe', format='png', path=r'.')
    imdata = plt.imread('.\\container.png')
    plt.title("A schematic of the model.")
    pymc.Matplot.plot(mcmc)

    mapmodel = pymc.MAP([thetas, exponents, corr, mean, variance, Data])
    mapmodel.fit()
    print('log-probability: {0} (probability: {1})'.format(mapmodel.logp, np.exp(mapmodel.logp)))

    print('Making predictions...')

#==============================================================================
#     #TODO: Make a proper estimate of the fit using all trials.
#     meancorr = np.mean(mcmc.trace('corr')[:], axis=0)
#     meanmean = np.mean(mcmc.trace('mean')[:])
#     meanthetas = np.array([np.mean(mcmc.trace('theta_{0}'.format(i))[:]) for i in range(3)])
#     meanexps = np.array([np.mean(mcmc.trace('exponent_{0}'.format(i))[:]) for i in range(3)])
#     predictions = np.empty(len(newpoints), dtype=np.float64)
#     for i, point in enumerate(newpoints):
#       rvec = np.exp(-np.sum(meanthetas*np.abs(point - samplepoints)**meanexps, axis=1))
#       predictions[i] = meanmean + np.dot(rvec.T, np.dot(np.linalg.inv(meancorr), (y - np.ones(len(y))*meanmean)))
#==============================================================================

    def do_predictions(mcmc, samplepoints, newpoints, y):
      """
      The interpolated values as a VaR trial x MCMC trial array.
      """
      #Need to do it vectorised to get accurate calculations!
      #the line below makes the r-vector over [MCMC trials, rvec(len(sobols)), VaR shock]
      rvecs = \
        np.exp(-(mcmc.trace('theta_0')[:, None, None]*
                 np.abs(newpoints[:,0] - samplepoints[:,0,None])**
                 mcmc.trace('exponent_0')[:, None, None] +
                 mcmc.trace('theta_1')[:, None, None]*
                 np.abs(newpoints[:,1] - samplepoints[:,1,None])**
                 mcmc.trace('exponent_1')[:, None, None] +
                 mcmc.trace('theta_2')[:, None, None]*
                 np.abs(newpoints[:,2] - samplepoints[:,2,None])**
                 mcmc.trace('exponent_2')[:, None, None])).astype(float)

      #the line below returns [MCMCtrials, len(sobols)xlen(sobols) corr matrix]
      corrs = mcmc.trace('corr')[:].astype(float)
#==============================================================================
#       #make the interpolation predictions. Note that mean is [MCMC trials, len(sobols)]
#       # but the values are constant along the second dimension (it's just needed
#       # to make the np.MvNormal call work)
#       N_MCMC = np.shape(rvecs)[0]
#       N_VaR = np.shape(rvecs)[2]
#       predictions = np.empty([N_MCMC, N_VaR], dtype=float)
#==============================================================================

      #Broadcasting is the most efficient (and unreadable) way to go:
      endbit = y[None, :] - mcmc.trace('mean')[:][:,0, None]
      #Left-most dot-product
      firstdp = np.sum(np.linalg.inv(corrs)*endbit[:, :, None], axis=1)

      #Note that this operation makes the output [VaRtrials, MCMCtrials]
      secondterm = np.sum(np.rollaxis(rvecs, 2)*firstdp[:,:], axis=2)
      #Finally, add the mean. Add the VaR dimension as the first one.
      #Note that the last dimension of mean is redundant -- just used for
      #making MvNormal work.
      return mcmc.trace('mean')[:][None, :, 0] + secondterm

      #To avoid a memoryerror, let's loop over the MCMC trials and use einsum:
      #Actually, this code is WAY too slow.
#==============================================================================
#       for MCMCtrial in range(N_MCMC):
#         for VaRtrial in range(N_VaR):
#           predictions[MCMCtrial, VaRtrial] = mcmc.trace('mean')[:][MCMCtrial, 0] + \
#             np.dot(rvecs[MCMCtrial,:,VaRtrial].T,
#                    np.dot(np.linalg.inv(corrs[MCMCtrial,:,:]),
#                           rvecs[MCMCtrial,:,VaRtrial]))
#==============================================================================
#==============================================================================
#           predictions[MCMCtrial,:] = mcmc.trace('mean')[:][MCMCtrial,0,None] + \
#             np.einsum('ij,ij->i', rvecs[MCMCtrial,:,:].T,
#                       np.dot(np.linalg.inv(corrs[MCMCtrial,:,:]), rvecs[MCMCtrial,:,:]).T)
#==============================================================================

    return mcmc, mapmodel, do_predictions(mcmc, samplepoints, newpoints, y)
Exemplo n.º 21
0
# Calculando as matrizes e tamanhos amostrais para todos os nodes

for n in t.postorder_node_iter():
    if str(n) not in node_matrices:
        node_matrices[str(n)], node_sample_size[str(n)], node_means[str(
            n)] = matrix_mean(n.child_nodes())

# Agora comeca o PyMC

root = t.seed_node

theta = [
    pm.MvNormalCov(
        'theta_0',
        #mu=np.array(data.ix[:, 0:num_traits].mean()),
        #value=node_means[str(root)],
        value=np.zeros(num_traits),
        mu=np.zeros(num_traits),
        C=np.eye(num_traits) * 100.)
]

sigma = [
    pm.WishartCov(
        'sigma_0',
        #value=node_matrices[str(root)],
        value=np.eye(num_traits),
        n=num_traits + 1,
        C=np.eye(num_traits) * 100.)
]

tree_idx = {str(root): 0}
Exemplo n.º 22
0
def gp_re_a(data):
    """ Random Effect model, with gaussian process correlations in residuals that
    includes age::
    
        Y_r,c,t,a = beta * X_r,c,t,a + f_r(t) + g_r(a) + h_c(t) + e_r,c,t,a

        f_r(t) ~ GP(0, C[0])
        g_r(a) ~ GP(0, C[1])
        h_c(t) ~ GP(0, C[2])

        C[i] ~ Matern(2, sigma_f[i], tau_f[i])

        e_r,c,t,a ~ N(0, (gamma * W_r,c,t,a)^2 + sigma_e^2 + sigma_r,c,t,a^2)
    """
    # covariates
    K1 = count_covariates(data, 'x')
    K2 = count_covariates(data, 'w')
    X = pl.array([data['x%d' % i] for i in range(K1)])
    W = pl.array([data['w%d' % i] for i in range(K2)])

    # priors
    beta = mc.Laplace('beta', mu=0., tau=1., value=pl.zeros(K1))
    gamma = mc.Exponential('gamma', beta=1., value=pl.zeros(K2))
    sigma_e = mc.Exponential('sigma_e', beta=1., value=1.)

    # hyperpriors for GPs  (These seem to really matter!)
    sigma_f = mc.Exponential('sigma_f', beta=1., value=[1., 1., .1])
    tau_f = mc.Truncnorm('tau_f',
                         mu=25.,
                         tau=5.**-2,
                         a=10,
                         b=pl.inf,
                         value=[25., 25., 25.])
    diff_degree = [2., 2., 2.]

    # fixed-effect predictions
    @mc.deterministic
    def mu(X=X, beta=beta):
        """ mu_i,r,c,t,a = beta * X_i,r,c,t,a"""
        return pl.dot(beta, X)

    @mc.deterministic
    def sigma_explained(W=W, gamma=gamma):
        """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a"""
        return pl.dot(gamma, W)

    # GP random effects
    ## make index dicts to convert from region/country/age to array index
    regions = pl.unique(data.region)
    countries = pl.unique(data.country)
    years = pl.unique(data.year)
    ages = pl.unique(data.age)

    r_index = dict([(r, i) for i, r in enumerate(regions)])
    c_index = dict([(c, i) for i, c in enumerate(countries)])
    t_index = dict([(t, i) for i, t in enumerate(years)])
    a_index = dict([(a, i) for i, a in enumerate(ages)])

    ## make variance-covariance matrices for GPs
    C = []
    for i, grid in enumerate([years, ages, years]):

        @mc.deterministic(name='C_%d' % i)
        def C_i(i=i,
                grid=grid,
                sigma_f=sigma_f,
                tau_f=tau_f,
                diff_degree=diff_degree):
            return gp.matern.euclidean(grid,
                                       grid,
                                       amp=sigma_f[i],
                                       scale=tau_f[i],
                                       diff_degree=diff_degree[i])

        C.append(C_i)

    ## implement GPs as multivariate normals with appropriate covariance structure
    f = [
        mc.MvNormalCov('f_%s' % r,
                       pl.zeros_like(years),
                       C[0],
                       value=pl.zeros_like(years)) for r in regions
    ]
    g = [
        mc.MvNormalCov('g_%s' % r,
                       pl.zeros_like(ages),
                       C[1],
                       value=pl.zeros_like(ages)) for r in regions
    ]
    h = [
        mc.MvNormalCov('h_%s' % c,
                       pl.zeros_like(years),
                       C[2],
                       value=pl.zeros_like(years)) for c in countries
    ]

    # parameter predictions and data likelihood
    ## organize observations into country panels and calculate predicted value before sampling error
    country_param_pred = []
    country_tau = []
    for c in pl.unique(data.country):
        ## find indices for this country
        i_c = [i for i in range(len(data)) if data.country[i] == c]

        ## find the index for this region, country, and for the relevant ages and times
        region = data.region[i_c[0]]
        r_index_c = r_index[region]
        c_index_c = c_index[c]
        t_index_c = [t_index[data.year[i]] for i in i_c]
        a_index_c = [a_index[data.age[i]] for i in i_c]

        ## find predicted parameter value for all observations of country c
        @mc.deterministic(name='country_param_pred_%s' % c)
        def country_param_pred_c(i=i_c,
                                 mu=mu,
                                 f=f[r_index_c],
                                 g=g[r_index_c],
                                 h=h[c_index_c],
                                 a=a_index_c,
                                 t=t_index_c):
            """ country_param_pred_c[row] = parameter_predicted[row] * 1[row.country == c]"""
            country_param_pred_c = pl.zeros_like(data.y)
            country_param_pred_c[i] = mu[i] + f[t] + g[a] + h[t]
            return country_param_pred_c

        country_param_pred.append(country_param_pred_c)

        ## find predicted parameter precision for all observations of country c
        @mc.deterministic(name='country_tau_%s' % c)
        def country_tau_c(i_c=i_c,
                          sigma_explained=sigma_explained,
                          sigma_e=sigma_e,
                          var_d_c=data.se[i_c]**2.):
            """ country_tau_c[row] = tau[row] * 1[row.country == c]"""
            country_tau_c = pl.zeros_like(data.y)
            country_tau_c[i_c] = 1 / (sigma_e**2. + sigma_explained[i_c]**2. +
                                      var_d_c)
            return country_tau_c

        country_tau.append(country_tau_c)

    @mc.deterministic
    def param_predicted(country_param_pred=country_param_pred):
        return pl.sum(country_param_pred, axis=0)

    @mc.deterministic
    def tau(country_tau=country_tau):
        return pl.sum(country_tau, axis=0)

    @mc.deterministic
    def data_predicted(param_predicted=param_predicted, tau=tau):
        return mc.rnormal(param_predicted, tau)

    predicted = data_predicted

    i_obs = [i for i in range(len(data)) if not pl.isnan(data.y[i])]

    @mc.observed
    def obs(value=data.y, i=i_obs, param_predicted=param_predicted, tau=tau):
        return mc.normal_like(value[i], param_predicted[i], tau[i])

    # MCMC step methods
    mod_mc = mc.MCMC(vars())
    mod_mc.use_step_method(mc.AdaptiveMetropolis, mod_mc.beta)

    ## use covariance matrix to seed adaptive metropolis steps
    for r in range(len(regions)):
        mod_mc.use_step_method(mc.AdaptiveMetropolis,
                               mod_mc.f[r],
                               cov=pl.array(C[0].value * .01))
        mod_mc.use_step_method(mc.AdaptiveMetropolis,
                               mod_mc.g[r],
                               cov=pl.array(C[1].value * .01))
    for c in range(len(countries)):
        mod_mc.use_step_method(mc.AdaptiveMetropolis,
                               mod_mc.h[c],
                               cov=pl.array(C[2].value * .01))

    ## find good initial conditions with MAP approx
    try:
        for var_list in [[obs, beta]] + \
            [[obs, f_r] for f_r in f] + \
            [[obs, g_r] for g_r in g] + \
            [[obs, h_c] for h_c in h] + \
            [[obs, beta, sigma_e]] + \
            [[obs, beta] + f] + \
            [[obs, beta] + g] + \
            [[obs, beta] + f + g] + \
            [[obs, h_c] for h_c in h]:
            print 'attempting to maximize likelihood of %s' % [
                v.__name__ for v in var_list
            ]
            mc.MAP(var_list).fit(method='fmin_powell', verbose=1)
            print ''.join(
                ['%s: %s\n' % (v.__name__, v.value) for v in var_list[1:]])
    except mc.ZeroProbability, e:
        print 'Warning: Optimization became infeasible:\n', e
def fit_blackbody_montecarlo(frequency,
                             seds,
                             errors=None,
                             temperature_guess=10,
                             beta_guess=None,
                             scale_guess=None,
                             blackbody_function=blackbody,
                             quiet=True,
                             return_MC=True,
                             nsamples=5000,
                             burn=1000,
                             min_temperature=0,
                             max_temperature=100,
                             scale_keyword='scale',
                             max_scale=1e60,
                             multivariate=False,
                             **kwargs):
    """
    Parameters
    ----------
    frequency : array
        Array of frequency values
    flux : array
        array of flux values
    err : array (optional)
        Array of error values (1-sigma, normal)
    temperature_guess : float
        Input / starting point for temperature
    min_temperature : float
    max_temperature : float
        Lower/Upper limits on fitted temperature
    beta_guess : float (optional)
        Opacity beta value
    scale_guess : float
        Arbitrary scale value to apply to model to get correct answer
    blackbody_function: function
        Must take x-axis (e.g. frequency), temperature, then scale and beta
        keywords (dependence on beta can be none)
    return_MC : bool
        Return the pymc.MCMC object?
    nsamples : int
        Number of samples to use in determining the posterior distribution
        (the answer)
    burn : int
        number of initial samples to ignore
    scale_keyword : ['scale','logscale','logN']
        What scale keyword to pass to the blackbody function to determine
        the amplitude
    kwargs : kwargs
        passed to blackbody function
    """

    d = {}

    d['temperature'] = pymc.distributions.Uniform('temperature',
                                                  min_temperature,
                                                  max_temperature,
                                                  value=temperature_guess)
    d['scale'] = pymc.distributions.Uniform('scale',
                                            0,
                                            max_scale,
                                            value=scale_guess)
    if beta_guess is not None:
        d['beta'] = pymc.distributions.Uniform('beta', 0, 10, value=beta_guess)
    else:
        d['beta'] = pymc.distributions.Uniform('beta', 0, 10, value=1)

    covar_list = dict([
        ((i, j), pymc.Uninformative('%s-%s' % (i, j), value=(i == j)))
        for i, j in itertools.combinations_with_replacement(('t', 'b', 's'), 2)
    ])
    for i, j in itertools.permutations(('t', 'b', 's'), 2):
        if (i, j) in covar_list:
            covar_list[(j, i)] = covar_list[(i, j)]
    covar_grid = [[covar_list[(i, j)] for i in ('t', 'b', 's')]
                  for j in ('t', 'b', 's')]
    d['tbcov'] = pymc.MvNormalCov(
        'tbcov',
        mu=[d['temperature'], d['beta'], d['scale']],
        C=covar_grid,
        value=[d['temperature'], d['beta'], d['scale']])

    precision_list = dict([
        ((i, j), pymc.Uninformative('%s-%s' % (i, j), value=(i == j)))
        for i, j in itertools.combinations_with_replacement(('t', 'b', 's'), 2)
    ])
    for i, j in itertools.permutations(('t', 'b', 's'), 2):
        if (i, j) in precision_list:
            precision_list[(j, i)] = precision_list[(i, j)]
    precision_grid = [[precision_list[(i, j)] for i in ('t', 'b', 's')]
                      for j in ('t', 'b', 's')]
    # need to force tau > 0...
    d['tbprec'] = pymc.MvNormalCov(
        'tbprec',
        mu=[d['temperature'], d['beta'], d['scale']],
        C=precision_grid,
        value=[1, 1, 1])

    for ii, (sed, err) in enumerate(zip(seds, errors)):
        d['t_%i' % ii] = pymc.Normal('t_%i' % ii,
                                     mu=d['tbcov'][0],
                                     tau=d['tbprec'][0])
        d['b_%i' % ii] = pymc.Normal('b_%i' % ii,
                                     mu=d['tbcov'][1],
                                     tau=d['tbprec'][1])
        d['s_%i' % ii] = pymc.Normal('s_%i' % ii,
                                     mu=d['tbcov'][2],
                                     tau=d['tbprec'][2])

        def bb_model(temperature=d['t_%i' % ii],
                     scale=d['s_%i' % ii],
                     beta=d['b_%i' % ii]):
            kwargs[scale_keyword] = scale
            y = blackbody_function(frequency,
                                   temperature,
                                   beta=beta,
                                   normalize=False,
                                   **kwargs)
            #print kwargs,beta,temperature,(-((y-flux)**2)).sum()
            return y

        d['bb_model_%i' % ii] = pymc.Deterministic(eval=bb_model,
                                                   name='bb_model_%i' % ii,
                                                   parents={
                                                       'temperature':
                                                       d['t_%i' % ii],
                                                       'scale':
                                                       d['s_%i' % ii],
                                                       'beta':
                                                       d['b_%i' % ii]
                                                   },
                                                   doc='Blackbody SED model.',
                                                   trace=True,
                                                   verbose=0,
                                                   dtype=float,
                                                   plot=False,
                                                   cache_depth=2)

        if err is None:
            d['err_%i' % ii] = pymc.distributions.Uninformative('error_%i' %
                                                                ii,
                                                                value=1.)
        else:
            d['err_%i' % ii] = pymc.distributions.Uninformative('error_%i' %
                                                                ii,
                                                                value=err,
                                                                observed=True)

        d['flux_%i' % ii] = pymc.distributions.Normal('flux_%i' % ii,
                                                      mu=d['bb_model_%i' % ii],
                                                      tau=1. /
                                                      d['err_%i' % ii]**2,
                                                      value=sed,
                                                      observed=True)

    #print d.keys()
    MC = pymc.MCMC(d)

    if nsamples > 0:
        MC.sample(nsamples, burn=burn)
        if return_MC:
            return MC

        MCfit = pymc.MAP(MC)
        MCfit.fit()
        T = MCfit.temperature.value
        scale = MCfit.scale.value

        if beta_guess is not None:
            beta = MCfit.beta.value
            return T, scale, beta
        else:
            return T, scale

    return MC
Exemplo n.º 24
0
                      't_l': 1851,
                      't_h': 1962
                  },
                  random=s_rand,
                  trace=True,
                  value=1900,
                  dtype=int,
                  rseed=1.,
                  observed=False,
                  cache_depth=2,
                  plot=True,
                  verbose=0)

x = pm.Binomial('x', value=7, n=10, p=.8, observed=True)

x = pm.MvNormalCov('x', numpy.ones(3), numpy.eye(3))
y = pm.MvNormalCov('y', numpy.ones(3), numpy.eye(3))
print x + y
#<pymc.PyMCObjects.Deterministic '(x_add_y)' at 0x105c3bd10>

print x[0]
#<pymc.CommonDeterministics.Index 'x[0]' at 0x105c52390>

print x[1] + y[2]
#<pymc.PyMCObjects.Deterministic '(x[1]_add_y[2])' at 0x105c52410>


@pm.deterministic
def r(switchpoint=s, early_rate=e, late_rate=l):
    """The rate of disaster occurrence."""
    value = numpy.zeros(len(D))
Exemplo n.º 25
0
def setup(dm, key, data_list=[], rate_stoch=None, emp_prior={}, lower_bound_data=[]):
    """ Generate the PyMC variables for a negative-binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the negative binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link rate stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = dismod3.utils.type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      rate model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the rate model
      into more complicated models, like the generic disease model.
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    param_mesh = dm.get_param_age_mesh()

    if pl.any(pl.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    # calculate effective sample size for all data and lower bound data
    dm.calc_effective_sample_size(data_list)
    dm.calc_effective_sample_size(lower_bound_data)

    # generate regional covariates
    covariate_dict = dm.get_covariates()
    derived_covariate = dm.get_derived_covariate_values()
    X_region, X_study = regional_covariates(key, covariate_dict, derived_covariate)

    # use confidence prior from prior_str  (only for posterior estimate, this is overridden below for empirical prior estimate)
    mu_delta = 1000.
    sigma_delta = 10.
    mu_log_delta = 3.
    sigma_log_delta = .25
    from dismod3.settings import PRIOR_SEP_STR
    for line in dm.get_priors(key).split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'heterogeneity':
            # originally designed for this:
            mu_delta = float(prior[1])
            sigma_delta = float(prior[2])

            # HACK: override design to set sigma_log_delta,
            # .25 = very, .025 = moderately, .0025 = slightly
            if float(prior[2]) > 0:
                sigma_log_delta = .025 / float(prior[2])


    # use the empirical prior mean if it is available
    if len(set(emp_prior.keys()) & set(['alpha', 'beta', 'gamma'])) == 3:
        mu_alpha = pl.array(emp_prior['alpha'])
        sigma_alpha = pl.array(emp_prior['sigma_alpha'])
        alpha = pl.array(emp_prior['alpha']) # TODO: make this stochastic
        vars.update(region_coeffs=alpha)

        beta = pl.array(emp_prior['beta']) # TODO: make this stochastic
        sigma_beta = pl.array(emp_prior['sigma_beta'])
        vars.update(study_coeffs=beta)

        mu_gamma = pl.array(emp_prior['gamma'])
        sigma_gamma = pl.array(emp_prior['sigma_gamma'])

        # Do not inform dispersion parameter from empirical prior stage
        # if 'delta' in emp_prior:
        #    mu_delta = emp_prior['delta']
        #    if 'sigma_delta' in emp_prior:
        #        sigma_delta = emp_prior['sigma_delta']
    else:
        import dismod3.regional_similarity_matrices as similarity_matrices
        n = len(X_region)
        mu_alpha = pl.zeros(n)
        sigma_alpha = .025  # TODO: make this a hyperparameter, with a traditional prior, like inverse gamma
        C_alpha = similarity_matrices.regions_nested_in_superregions(n, sigma_alpha)

        # use alternative region effect covariance structure if requested
        region_prior_key = 'region_effects'
        if region_prior_key in dm.params:
            if dm.params[region_prior_key] == 'uninformative':
                C_alpha = similarity_matrices.uninformative(n, sigma_alpha)

        region_prior_key = 'region_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]
        if region_prior_key in dm.params:
            if dm.params[region_prior_key] == 'uninformative':
                C_alpha = similarity_matrices.regions_nested_in_superregions(n, dm.params[region_prior_key]['std'])

        # add informative prior for sex effect if requested
        sex_prior_key = 'sex_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]
        if sex_prior_key in dm.params:
            print 'adjusting prior on sex effect coefficient for %s' % key
            mu_alpha[n-1] = pl.log(dm.params[sex_prior_key]['mean'])
            sigma_sex = (pl.log(dm.params[sex_prior_key]['upper_ci']) - pl.log(dm.params[sex_prior_key]['lower_ci'])) / (2*1.96)
            C_alpha[n-1, n-1]= sigma_sex**2.

        # add informative prior for time effect if requested
        time_prior_key = 'time_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if time_prior_key in dm.params:
            print 'adjusting prior on time effect coefficient for %s' % key
            mu_alpha[n-2] = pl.log(dm.params[time_prior_key]['mean'])
            sigma_time = (pl.log(dm.params[time_prior_key]['upper_ci']) - pl.log(dm.params[time_prior_key]['lower_ci'])) / (2*1.96)
            C_alpha[n-2, n-2]= sigma_time**2.
        
        #C_alpha = similarity_matrices.all_related_equally(n, sigma_alpha)
        alpha = mc.MvNormalCov('region_coeffs_%s' % key, mu=mu_alpha,
                            C=C_alpha,
                            value=mu_alpha)
        vars.update(region_coeffs=alpha, region_coeffs_step_cov=.005*C_alpha)

        mu_beta = pl.zeros(len(X_study))
        sigma_beta = .1

        # add informative prior for beta effect if requested
        prior_key = 'beta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on beta effect coefficients for %s' % key
            mu_beta = pl.array(dm.params[prior_key]['mean'])
            sigma_beta = pl.array(dm.params[prior_key]['std'])

        beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=sigma_beta**-2., value=mu_beta)
        vars.update(study_coeffs=beta)

        mu_gamma = 0.*pl.ones(len(est_mesh))
        sigma_gamma = 2.*pl.ones(len(est_mesh))

        # add informative prior for gamma effect if requested
        prior_key = 'gamma_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on gamma effect coefficients for %s' % key
            mu_gamma = pl.array(dm.params[prior_key]['mean'])
            sigma_gamma = pl.array(dm.params[prior_key]['std'])

        # always use dispersed prior on delta for empirical prior phase
        mu_log_delta = 3.
        sigma_log_delta = .25
        # add informative prior for delta effect if requested
        prior_key = 'delta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on delta effect coefficients for %s' % key
            mu_log_delta = dm.params[prior_key]['mean']
            sigma_log_delta = dm.params[prior_key]['std']

    mu_zeta = 0.
    sigma_zeta = .25
    # add informative prior for zeta effect if requested
    prior_key = 'zeta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
    if prior_key in dm.params:
        print 'adjusting prior on zeta effect coefficients for %s' % key
        mu_zeta = dm.params[prior_key]['mean']
        sigma_zeta = dm.params[prior_key]['std']
    
    if mu_delta != 0.:
        if sigma_delta != 0.:
            log_delta = mc.Normal('log_dispersion_%s' % key, mu=mu_log_delta, tau=sigma_log_delta**-2, value=3.)
            zeta = mc.Normal('zeta_%s'%key, mu=mu_zeta, tau=sigma_zeta**-2, value=mu_zeta)
            delta = mc.Lambda('dispersion_%s' % key, lambda x=log_delta: 50. + 10.**x)
            vars.update(dispersion=delta, log_dispersion=log_delta, zeta=zeta, dispersion_step_sd=.1*log_delta.parents['tau']**-.5)
        else:
            delta = mc.Lambda('dispersion_%s' % key, lambda x=mu_delta: mu_delta)
            vars.update(dispersion=delta)
        
    else:
        delta = mc.Lambda('dispersion_%s' % key, lambda mu=mu_delta: 0)
        vars.update(dispersion=delta)

    if len(sigma_gamma) == 1:
        sigma_gamma = sigma_gamma[0]*pl.ones(len(est_mesh))

    # create varible for interpolated rate;
    # also create variable for age-specific rate function, if it does not yet exist
    if rate_stoch:
        # if the rate_stoch already exists, for example prevalence in the generic model,
        # we use it to back-calculate mu and eventually gamma
        mu = rate_stoch

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta):
            return pl.log(pl.maximum(dismod3.settings.NEARLY_ZERO, mu)) - pl.dot(alpha, Xa) - pl.dot(beta, Xb)

        @mc.potential(name='age_coeffs_potential_%s' % key)
        def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma[param_mesh]**2, param_mesh=param_mesh):
            return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma)

        vars.update(rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential)
    else:
        # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu
        # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh
        initial_gamma = pl.log(dismod3.settings.NEARLY_ZERO + dm.get_initial_value(key))

        gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=sigma_gamma[param_mesh]**-2, value=initial_gamma[param_mesh])

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh):
            return dismod3.utils.interpolate(param_mesh, gamma_mesh, est_mesh)

        @mc.deterministic(name=key)
        def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma):
            return predict_rate([Xa, Xb], alpha, beta, gamma, lambda f, age: f, est_mesh)

        # Create a guess at the covariance matrix for MCMC proposals to update gamma_mesh
        from pymc.gp.cov_funs import matern
        a = pl.atleast_2d(param_mesh).T
        C = matern.euclidean(a, a, diff_degree = 2, amp = 1.**2, scale = 10.)

        vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, rate_stoch=mu, age_coeffs_mesh_step_cov=.005*pl.array(C))

        # adjust value of gamma_mesh based on priors, if necessary
        # TODO: implement more adjustments, currently only adjusted based on at_least priors
        for line in dm.get_priors(key).split(PRIOR_SEP_STR):
            prior = line.strip().split()
            if len(prior) == 0:
                continue
            if prior[0] == 'at_least':
                delta_gamma = pl.log(pl.maximum(mu.value, float(prior[1]))) - pl.log(mu.value)
                gamma_mesh.value = gamma_mesh.value + delta_gamma[param_mesh]

    # create potentials for priors
    dismod3.utils.generate_prior_potentials(vars, dm.get_priors(key), est_mesh)

    # create observed stochastics for data
    vars['data'] = []

    if mu_delta != 0.:  
        value = []
        N = []
        Xa = []
        Xb = []
        ai = []
        aw = []
        Xz = []

        for d in data_list:
            try:
                age_indices, age_weights, Y_i, N_i = values_from(dm, d)
            except ValueError:
                debug('WARNING: could not calculate likelihood for data %d' % d['id'])
                continue

            value.append(Y_i*N_i)
            N.append(N_i)
            Xa.append(covariates(d, covariate_dict)[0])
            Xb.append(covariates(d, covariate_dict)[1])
            Xz.append(float(d.get('bias') or 0.))
            ai.append(age_indices)
            aw.append(age_weights)

            vars['data'].append(d)

        N = pl.array(N)
        Xa = pl.array(Xa)
        Xb = pl.array(Xb)
        Xz = pl.array(Xz)
        value = pl.array(value)
        
        vars['effective_sample_size'] = list(N)
        
    if len(vars['data']) > 0:
        # TODO: consider using only a subset of the rates at each step of the fit to speed computation; say 100 of them
        k = 50000
        if len(vars['data']) < k:
            data_sample = range(len(vars['data']))
        else:
            import random
            @mc.deterministic(name='data_sample_%s' % key)
            def data_sample(n=len(vars['data']), k=k):
                return random.sample(range(n), k)

        @mc.deterministic(name='rate_%s' % key)
        def rates(S=data_sample,
                Xa=Xa, Xb=Xb,
                alpha=alpha, beta=beta, gamma=gamma,
                bounds_func=vars['bounds_func'],
                age_indices=ai,
                age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa[S], alpha) + pl.dot(Xb[S], pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu = pl.zeros_like(shifts)
            for i,s in enumerate(S):
                mu[i] = pl.dot(age_weights[s], bounds_func(shifts[i] * exp_gamma[age_indices[s]], age_indices[s]))
                # TODO: evaluate speed increase and accuracy decrease of the following:
                #midpoint = age_indices[s][len(age_indices[s])/2]
                #mu[i] = bounds_func(shifts[i] * exp_gamma[midpoint], midpoint)
                # TODO: evaluate speed increase and accuracy decrease of the following: (to see speed increase, need to code this up using difference of running sums
                #mu[i] = pl.dot(pl.ones_like(age_weights[s]) / float(len(age_weights[s])),
                #               bounds_func(shifts[i] * exp_gamma[age_indices[s]], age_indices[s]))
            return mu
        vars['expected_rates'] = rates
        
        @mc.observed
        @mc.stochastic(name='data_%s' % key)
        def obs(value=value,
                S=data_sample,
                N=N,
                mu_i=rates,
                Xz=Xz,
                zeta=zeta,
                delta=delta):
            #zeta_i = .001
            #residual = pl.log(value[S] + zeta_i) - pl.log(mu_i*N[S] + zeta_i)
            #return mc.normal_like(residual, 0, 100. + delta)
            logp = mc.negative_binomial_like(value[S], N[S]*mu_i, delta*pl.exp(Xz*zeta))
            return logp

        vars['observed_counts'] = obs

        @mc.deterministic(name='predicted_data_%s' % key)
        def predictions(value=value,
                        N=N,
                        S=data_sample,
                        mu=rates,
                        delta=delta):
            r_S = mc.rnegative_binomial(N[S]*mu, delta)/N[S]
            r = pl.zeros(len(vars['data']))
            r[S] = r_S
            return r

        vars['predicted_rates'] = predictions
        debug('likelihood of %s contains %d rates' % (key, len(vars['data'])))

    # now do the same thing for the lower bound data
    # TODO: refactor to remove duplicated code
    vars['lower_bound_data'] = []
    value = []
    N = []
    Xa = []
    Xb = []
    ai = []
    aw = []
    for d in lower_bound_data:
        try:
            age_indices, age_weights, Y_i, N_i = values_from(dm, d)
        except ValueError:
            debug('WARNING: could not calculate likelihood for data %d' % d['id'])
            continue

        value.append(Y_i*N_i)
        N.append(N_i)
        Xa.append(covariates(d, covariate_dict)[0])
        Xb.append(covariates(d, covariate_dict)[1])
        ai.append(age_indices)
        aw.append(age_weights)

        vars['lower_bound_data'].append(d)

    N = pl.array(N)
    value = pl.array(value)

    if len(vars['lower_bound_data']) > 0:
        @mc.observed
        @mc.stochastic(name='lower_bound_data_%s' % key)
        def obs_lb(value=value, N=N,
                   Xa=Xa, Xb=Xb,
                   alpha=alpha, beta=beta, gamma=gamma,
                   bounds_func=vars['bounds_func'],
                   delta=delta,
                   age_indices=ai,
                   age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa, alpha) + pl.dot(Xb, pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu_i = [pl.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed
            rate_param = mu_i*N
            violated_bounds = pl.nonzero(rate_param < value)
            logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta)
            return logp

        vars['observed_lower_bounds'] = obs_lb
        debug('likelihood of %s contains %d lowerbounds' % (key, len(vars['lower_bound_data'])))

    return vars
Exemplo n.º 26
0
    def makeNormPrior(self, model, parts):

        parts.shape_params = np.empty(self.nshapebins, dtype=object)
        for i, (mu, cov) in enumerate(self.shapedistro_params):
            parts.shape_params[i] = pymc.MvNormalCov('shape_params_%d' % i, mu,
                                                     cov)
Exemplo n.º 27
0
import pandas as pd
import numpy as np
import pymc as pm

dados = pd.read_csv("../dados/dados5sp.csv")

# Filogenia: (B, ((C, E),(A, D)))

# Hiper parametros do no (B, (CEAD))

theta_B_CEAD = pm.MvNormalCov('theta_B_CEAD',
                              mu=np.zeros(4),
                              C=np.eye(4),
                              value=np.zeros(4))

sigma_B_CEAD = pm.WishartCov('sigma_B_CEAD', n=5, C=np.eye(4), value=np.eye(4))

# Ramos do no (B, (CEAD))

theta_B = pm.MvNormalCov('theta_B',
                         mu=theta_B_CEAD,
                         C=sigma_B_CEAD,
                         value=np.zeros(4))

sigma_B = pm.WishartCov('sigma_B', n=5, C=sigma_B_CEAD, value=np.eye(4))

theta_CEAD = pm.MvNormalCov('theta_CEAD',
                            mu=theta_B_CEAD,
                            C=sigma_B_CEAD,
                            value=np.zeros(4))
dirichilet_distribution_prior_correlation = pm.Uniform(
    'dirichilet_distribution_prior_correlation', lower=0.499, upper=0.501)
dirichilet_distribution_prior = {}
m_list = {}
cov_list = {}
for i in range(1, MAX_DIMENSION + 1):
    print i
    m_list[i] = np.array([dirichilet_distribution_prior_mean] * i)
    cov_list[i] = np.array([[
        dirichilet_distribution_prior_std * dirichilet_distribution_prior_std *
        dirichilet_distribution_prior_correlation
    ] * i] * i)
    for j in range(i):
        cov_list[i][j][
            j] = dirichilet_distribution_prior_std * dirichilet_distribution_prior_std
    dirichilet_distribution_prior[i] = pm.MvNormalCov(
        'dirichilet_distribution_prior', mu=m_list[i], C=cov_list[i])

data_array = np.array([[1.0, 2.0, 3.0, 0.4, 0.4, 0.2], [1.0, 1.0, 0.4, 0.6]])


@pm.stochastic(dtype=float)
def data_tree(value=[1.0, 1.0, 0.4, 0.6], observed=True):
    n_dimension = len(value) / 2

    #
    #    for j in range(len(permutation_table)):
    #        guest_arrange_num = guest_total_num
    #        comb_num = 1
    #        for jj in range(len(permutation_table[j])):
    #            guest_num_per_table = permutation_table[j][jj]
    #            comb_num = comb_num * int(comb(guest_arrange_num - 1, guest_num_per_table - 1))
Exemplo n.º 29
0
after a while, the covariance matrix of the samples for mu tend to C/N.

"""
N = 50
mu = np.array([-2., 3.])
C = np.array([[1, .8 * np.sqrt(2)], [.8 * np.sqrt(2), 2.]])
r = pymc.rmv_normal_cov(mu, C, size=50)


@pymc.stoch
def mean(value=np.array([0., 0.])):
    """The mean of the samples (mu). """
    return 0.


obs = pymc.MvNormalCov('obs', mean, C, value=r, observed=True)


class TestAM(TestCase):
    def test_convergence(self):
        S = pymc.MCMC([mean, obs])
        S.use_step_method(pymc.AdaptiveMetropolis, mean, delay=200)

        S.sample(6000, burn=1000)
        Cs = np.cov(S.trace('mean')[:].T)
        assert_array_almost_equal(Cs, C / N, 2)

    def test_cov_from_trace(self):
        S = pymc.MCMC([mean, obs])
        S.use_step_method(pymc.Metropolis, mean)
        S.sample(2000)
Exemplo n.º 30
0
import pymc as pm
import numpy as np

# Criando parametros conhecidos
original_sigma = np.array([[1, 0.5], [0.5, 1]])
original_theta = [1, -1]

# Simulando dados com media theta e covariancia sigma
data = np.random.multivariate_normal(original_theta, original_sigma, 100)

# Definindo priors, Wishart pra sigma e normal pra theta
sigma = pm.WishartCov('sigma', n=3, C=np.eye(2), value=np.eye(2))

theta = pm.MvNormalCov('theta', mu=[0.,0.], C=np.eye(2), value=[0.,0.])

# Verossimilhanca gaussiana com media theta e covariancia sigma
x = pm.MvNormalCov('x', theta, sigma, value=data, observed=True)