示例#1
0
def fe(data):
    """ Fixed Effect model::
    
        Y_r,c,t = beta * X_r,c,t + e_r,c,t
        e_r,c,t ~ N(0, sigma^2)
    """
    # covariates
    K1 = count_covariates(data, 'x')
    X = pl.array([data['x%d' % i] for i in range(K1)])

    K2 = count_covariates(data, 'w')
    W = pl.array([data['w%d' % i] for i in range(K1)])

    # priors
    beta = mc.Uninformative('beta', value=pl.zeros(K1))
    gamma = mc.Uninformative('gamma', value=pl.zeros(K2))
    sigma_e = mc.Uniform('sigma_e', lower=0, upper=1000, value=1)

    # predictions
    @mc.deterministic
    def mu(X=X, beta=beta):
        return pl.dot(beta, X)

    param_predicted = mu

    @mc.deterministic
    def sigma_explained(W=W, gamma=gamma):
        """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a"""
        return pl.dot(gamma, W)

    @mc.deterministic
    def predicted(mu=mu, sigma_explained=sigma_explained, sigma_e=sigma_e):
        return mc.rnormal(mu, 1 / (sigma_explained**2. + sigma_e**2.))

    # likelihood
    i_obs = pl.find(1 - pl.isnan(data.y))

    @mc.observed
    def obs(value=data.y,
            i_obs=i_obs,
            mu=mu,
            sigma_explained=sigma_explained,
            sigma_e=sigma_e):
        return mc.normal_like(value[i_obs], mu[i_obs],
                              1. / (sigma_explained[i_obs]**2. + sigma_e**-2.))

    # set up MCMC step methods
    mod_mc = mc.MCMC(vars())
    mod_mc.use_step_method(mc.AdaptiveMetropolis, mod_mc.beta)

    # find good initial conditions with MAP approx
    print 'attempting to maximize likelihood'
    var_list = [mod_mc.beta, mod_mc.obs, mod_mc.sigma_e]
    mc.MAP(var_list).fit(method='fmin_powell', verbose=1)

    return mod_mc
示例#2
0
def test_covariate_model_dispersion():
    # simulate normal data
    n = 100

    model = dismod_mr.data.ModelData()
    model.hierarchy, model.output_template = dismod_mr.testing.data_simulation.small_output()

    Z = mc.rcategorical([.5, 5.], n)
    zeta_true = -.2

    pi_true = .1
    ess = 10000.*np.ones(n)
    eta_true = np.log(50)
    delta_true = 50 + np.exp(eta_true)

    p = mc.rnegative_binomial(pi_true*ess, delta_true*np.exp(Z*zeta_true)) / ess

    model.input_data = pd.DataFrame(dict(value=p, z_0=Z))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000

    # create model and priors
    variables = dict(mu=mc.Uninformative('mu_test', value=pi_true))
    variables.update(dismod_mr.model.covariates.mean_covariate_model('test', variables['mu'], model.input_data, {},
                                                                     model, 'all', 'total', 'all'))
    variables.update(dismod_mr.model.covariates.dispersion_covariate_model('test', model.input_data, .1, 10.))
    variables.update(dismod_mr.model.likelihood.neg_binom('test', variables['pi'], variables['delta'], p, ess))

    # fit model
    m = mc.MCMC(variables)
    m.sample(2)
示例#3
0
def anneal_ldst(n=11, phases=10, iters=1000):
    """ MCMC/simulated annealing to generate a random low-degree
    spanning tree on a grid graph

    Parameters
    ----------
    n : int, size of grid
    phases : int, optional, number of cooling phases
    iters : int, optional, number of MCMC steps per phase
    
    Returns
    -------
    T : nx.Graph, spanning tree with T.base_graph, with few degree 3 vertices
    """
    beta = pm.Uninformative('beta', value=1.)
    ldst = LDST(my_grid_graph([n,n]), beta=beta)

    mod_mc = pm.MCMC([beta, ldst])
    mod_mc.use_step_method(STMetropolis, ldst)
    mod_mc.use_step_method(pm.NoStepper, beta)

    for i in range(phases):
        print('phase %d' % (i+1),)
        beta.value = i*5
        mod_mc.sample(iters, burn=iters-1)
        print('frac of deg 2 vtx = %.2f' % np.mean(np.array(ldst.value.degree().values()) == 2))
    return ldst.value
示例#4
0
def anneal_bdst(n=11, depth=10, phases=10, iters=1000):
    """ MCMC/simulated annealing to generate a random bounded-depth spanning tree
    Parameters
    ----------
    n : int, size of grid
    depth : int, optional, target bound on depth

    Returns
    -------
    T : nx.Graph, spanning tree with T.base_graph, possibly with degree bound satisfied
    """

    beta = pm.Uninformative('beta', value=1.)

    G = nx.grid_graph([n, n])
    root = ((n-1)/2, (n-1)/2)
    bdst = BDST(G, root, depth, beta)

    @pm.deterministic
    def max_depth(T=bdst, root=root):
        shortest_path_length = nx.shortest_path_length(T, root)
        T.max_depth = max(shortest_path_length.values())
        return T.max_depth

    mod_mc = pm.MCMC([beta, bdst, max_depth])
    mod_mc.use_step_method(STMetropolis, bdst)
    mod_mc.use_step_method(pm.NoStepper, beta)

    for i in range(phases):
        beta.value = i*5
        mod_mc.sample(iters, thin=max(1, iters/100))
        print('cur depth', max_depth.value)
        print('pct of trace with max_depth <= depth', np.mean(mod_mc.trace(max_depth)[:] <= depth))
    return bdst.value
示例#5
0
    def __init__(self,
                 loc,
                 scale,
                 loc_step_method=None,
                 scale_step_method=None,
                 beta_step_method=None,
                 loc_step_method_args=None,
                 scale_step_method_args=None,
                 beta_step_method_args=None,
                 *args,
                 **kwargs):

        if type(loc) != list:
            loc = [loc]
        self.loc = loc
        self.scale = scale
        self.beta = set([])
        for node in self.loc:
            self.beta.update(node.extended_children)

        pm.StepMethod.__init__(self, [scale] + loc + list(self.beta), *args,
                               **kwargs)

        #set alpha
        self.alpha = pm.Uninformative('alpha',
                                      value=1.,
                                      trace=False,
                                      plot=False)

        #assign default Metropolis step method if needed
        if loc_step_method is None:
            loc_step_method = pm.Metropolis
        if scale_step_method is None:
            scale_step_method = pm.Metropolis
        if beta_step_method is None:
            beta_step_method = pm.Metropolis

        if loc_step_method_args is None:
            loc_step_method_args = {}
        if scale_step_method_args is None:
            scale_step_method_args = {}
        if beta_step_method_args is None:
            beta_step_method_args = {}

        #set step methods
        self.loc_steps = [
            loc_step_method(node, **loc_step_method_args) for node in self.loc
        ]
        self.scale_step = scale_step_method(scale, **scale_step_method_args)
        self.beta_steps = [
            beta_step_method(node, **beta_step_method_args)
            for node in self.beta
        ]
        self.alpha_step = MetropolisAlpha(self.alpha, self.beta, loc, scale)
示例#6
0
    def __init__(self,
                 predictions,
                 measurements,
                 uncertainties,
                 regularization_strength=1.0,
                 prior_pops=None):
        """Bayesian Energy Landscape Tilting with maximum entropy prior.

        Parameters
        ----------
        predictions : ndarray, shape = (num_frames, num_measurements)
            predictions[j, i] gives the ith observabled predicted at frame j
        measurements : ndarray, shape = (num_measurements)
            measurements[i] gives the ith experimental measurement
        uncertainties : ndarray, shape = (num_measurements)
            uncertainties[i] gives the uncertainty of the ith experiment
        regularization_strength : float
            How strongly to weight the MVN prior (e.g. lambda)
        precision : ndarray, optional, shape = (num_measurements, num_measurements)
            The precision matrix of the predicted observables.
        prior_pops : ndarray, optional, shape = (num_frames)
            Prior populations of each conformation.  If None, use uniform populations.
        """

        BELT.__init__(self,
                      predictions,
                      measurements,
                      uncertainties,
                      prior_pops=prior_pops)

        self.alpha = pymc.Uninformative(
            "alpha", value=np.zeros(self.num_measurements)
        )  # The prior on alpha is defined as a potential, so we use Uninformative variables here.
        self.initialize_variables()

        self.log_prior_pops = np.log(self.prior_pops)

        @pymc.potential
        def logp_prior(populations=self.populations,
                       log_prior_pops=self.log_prior_pops):
            # So x log(x) -> 0 as x -> 0, so we want to *drop* zeros
            # This is important because we otherwise might get NANs, as numpy doesn't know how to evaluate x * np.log(x)
            ind = np.where(populations > 0)[0]
            populations = populations[ind]
            log_prior_pops = log_prior_pops[ind]
            expr = populations.dot(
                np.log(populations)) - populations.dot(log_prior_pops)
            return -1 * regularization_strength * expr

        self.logp_prior = logp_prior
示例#7
0
def linear():
    beta = mc.Uninformative('beta', value=[0., 0.])
    sigma = mc.Uniform('sigma', lower=0., upper=100., value=1.)

    @mc.deterministic
    def y_mean(beta=beta, X=data.hdi2005):
        return beta[0] + beta[1] * X

    y_obs = mc.Normal('y_obs',
                      value=data.tfr2005,
                      mu=y_mean,
                      tau=sigma**-2,
                      observed=True)

    return vars()
示例#8
0
def nonlinear():
    beta = mc.Uninformative('beta', value=[0., 0., 0.])
    gamma = mc.Normal('gamma', mu=.9, tau=.05**-2, value=.9)
    sigma = mc.Uniform('sigma', lower=0., upper=100., value=1.)

    @mc.deterministic
    def y_mean(beta=beta, gamma=gamma, X=data.hdi2005):
        return beta[0] + beta[1]*X \
            + beta[2]*pl.maximum(0., X-gamma)

    y_obs = mc.Normal('y_obs',
                      value=data.tfr2005,
                      mu=y_mean,
                      tau=sigma**-2,
                      observed=True)

    return vars()
示例#9
0
    def __init__(self,
                 predictions,
                 measurements,
                 uncertainties,
                 regularization_strength=1.0,
                 prior_pops=None):
        """Bayesian Energy Landscape Tilting with Dirichlet prior.

        Parameters
        ----------
        predictions : ndarray, shape = (num_frames, num_measurements)
            predictions[j, i] gives the ith observabled predicted at frame j
        measurements : ndarray, shape = (num_measurements)
            measurements[i] gives the ith experimental measurement
        uncertainties : ndarray, shape = (num_measurements)
            uncertainties[i] gives the uncertainty of the ith experiment
        regularization_strength : float
            How strongly to weight the prior (e.g. lambda)
        precision : ndarray, optional, shape = (num_measurements, num_measurements)
            The precision matrix of the predicted observables.
        prior_pops : ndarray, optional, shape = (num_frames)
            Prior populations of each conformation.  If None, use uniform populations.
        """

        BELT.__init__(self,
                      predictions,
                      measurements,
                      uncertainties,
                      prior_pops=prior_pops)

        self.alpha = pymc.Uninformative(
            "alpha", value=np.zeros(self.num_measurements)
        )  # The prior on alpha is defined as a potential, so we use Uninformative variables here.
        self.initialize_variables()

        @pymc.potential
        def logp_prior(populations=self.populations):
            if populations.min() <= 0:
                return -1 * np.inf
            else:
                expr = self.prior_pops.dot(np.log(populations))
                return regularization_strength * expr

        self.logp_prior = logp_prior
示例#10
0
def banana(dim=2, b=.03, step='Metropolis', iters=5000):
    """ The non-linear banana-shaped distributions are constructed
    from the Gaussian ones by 'twisting' them as follows.  Let f be
    the density of the multivariate normal distribution N(0, C_1) with
    the covariance again given by C_1 = diag(100, 1, ..., 1).  The
    density function of the 'twisted' Gaussian with the nonlinearity
    parameter b > 0 is given by f_b = f \circ \phi_b, where the
    function \phi)b = (x_1, x_2 + b x_1^2 - 100b, x_3, ..., x_n).
    """
    assert dim >= 2, 'banana must be dimension >= 2'
    C_1 = pl.ones(dim)
    C_1[0] = 100.
    X = mc.Uninformative('X', value=pl.zeros(dim))

    def banana_like(X, tau, b):
        phi_X = pl.copy(X)
        phi_X *= 30.  # rescale X to match scale of other models
        phi_X[1] = phi_X[1] + b * phi_X[0]**2 - 100 * b

        return mc.normal_like(phi_X, 0., tau)

    @mc.potential
    def banana(X=X, tau=C_1**-1, b=b):
        return banana_like(X, tau, b)

    mod = setup_and_sample(vars(), step, iters)
    im = pl.imread('banana.png')
    x = pl.arange(-1, 1, .01)
    y = pl.arange(-1, 1, .01)
    z = [[banana_like(pl.array([xi, yi]), C_1[[0, 1]]**-1, b) for xi in x]
         for yi in y]

    def plot_distribution():
        pl.imshow(im,
                  extent=[-1, 1, -1, 1],
                  aspect='auto',
                  interpolation='bicubic')
        pl.contour(x, y, z, [-1000, -10, -6], cmap=pl.cm.Greys, alpha=.5)

    mod.plot_distribution = plot_distribution

    return mod
示例#11
0
    def __init__(self,
                 predictions,
                 measurements,
                 uncertainties,
                 prior_pops=None,
                 weights_alpha=None):
        """Bayesian Energy Landscape Tilting with Jeffrey's prior.

        Parameters
        ----------
        predictions : ndarray, shape = (num_frames, num_measurements)
            predictions[j, i] gives the ith observabled predicted at frame j
        measurements : ndarray, shape = (num_measurements)
            measurements[i] gives the ith experimental measurement
        uncertainties : ndarray, shape = (num_measurements)
            uncertainties[i] gives the uncertainty of the ith experiment
        prior_pops : ndarray, optional, shape = (num_frames)
            Prior populations of each conformation.  If None, use uniform populations.
            
        Notes:
        ------
        This feature is UNTESTED.            
        """
        BELT.__init__(self,
                      predictions,
                      measurements,
                      uncertainties,
                      prior_pops=prior_pops)

        self.alpha = pymc.Uninformative("alpha",
                                        value=np.zeros(self.num_measurements))
        self.initialize_variables()

        @pymc.potential
        def logp_prior(populations=self.populations, mu=self.mu):
            return log_jeffreys(populations, predictions, mu=mu)

        self.logp_prior = logp_prior
示例#12
0
    y_hat = n*p
    return 2.*sum((y*log(y/y_hat))[where(y>0)]) + 2.*sum(((n-y) * log(n-y) / (n-y_hat))[where(y<n)])

# c: .02-1.
# alph: 18-30
# b: .02-1.

# alpha = array([10., 10., 10., 3.])
sig_mean = array([.5, .4, .5, 1, .4, .5, 1])

# sigma = pm.Gamma('sigma', alpha, alpha/sig_mean)
sigma = pm.OneOverX('sigma', value=sig_mean)

p_mean_mu = array([1.3, 3.7, 2.3, 0, 3.7, 2.3, 0])
# p_mean = pm.MvNormal('p_mean', p_mean_mu, diag([10., 10., 10., 1.]))
p_mean = pm.Uninformative('p_mean',value=p_mean_mu)

R1 = pm.Uninformative('R1', zeros(6,dtype=float))
R2 = pm.Uninformative('R2', zeros(3,dtype=float))
R3 = pm.Uninformative('R3', zeros(3,dtype=float))

# For debugging
# R1.value = arange(1,7)
# R2.value = arange(7,10)
# R3.value = arange(10,13)

@pm.deterministic
def cholfac(R1=R1, R2=R2, R3=R3, sigma = sigma):
    """Cholesky factor of the covariance matrix."""

    cov = np.zeros((7,7),dtype=float)
示例#13
0
def create_model(region_name,
                 all_pts,
                 name,
                 scale_params,
                 amp_params,
                 cpus,
                 with_stukel,
                 spatial,
                 chunk,
                 covariate_names,
                 disttol,
                 ttol,
                 AM_delay=50000,
                 AM_interval=100,
                 AM_sd=.1,
                 crashed_db=None):

    # ======================================
    # = Make sure it's safe to make output =
    # ======================================

    if not spatial:
        name += '_nonspatial'
    if with_stukel:
        name += '_stukel'
    for cname in covariate_names:
        name += '_%s' % cname

    if name + '.hdf5' in os.listdir('.'):
        print
        print """=============
= ATTENTION =
============="""
        print

        OK = False
        while not OK:
            y = raw_input(
                'Database %s already exists.\nDo you want to delete it? Error will be raised otherwise.\n>> '
                % (name + '.hdf5'))
            if y.lower() == 'yes':
                print 'OK, moving to trash.'
                os.system('mv %s ~/.Trash' % (name + '.hdf5'))
                OK = True
            elif y.lower() == 'no':
                raise RuntimeError, 'But dash it all! I mean to say, what?'
            else:
                y = raw_input('Please type yes or no.\n>> ')

    norun_name = '_'.join(name.split('_')[:2])

    C_time = [0.]
    f_time = [0.]
    M_time = [0.]

    # =============================
    # = Preprocess data, uniquify =
    # =============================

    # Convert latitude and longitude from degrees to radians.
    lon = all_pts.LONG * np.pi / 180.
    lat = all_pts.LAT * np.pi / 180.

    # Convert time to end year - 2009 (no sense forcing mu to adjust by too much).
    # t = all_pts.YEAR_START-2009. + all_pts.MONTH_STAR / 12.
    t = all_pts.TIME - 2009

    # Make lon, lat, t triples.
    data_mesh = np.vstack((lon, lat, t)).T

    disttol = disttol / 6378.
    ttol = ttol / 12.

    # Find near spatiotemporal duplicates.
    if spatial:
        ui = []
        ri = []
        fi = []
        ti = []
        dx = np.empty(1)
        for i in xrange(data_mesh.shape[0]):
            match = False
            for j in xrange(len(ui)):
                pm.gp.geo_rad(dx, data_mesh[i, :2].reshape((1, 2)),
                              data_mesh[ui[j], :2].reshape((1, 2)))
                dt = abs(t[ui[j]] - t[i])

                if dx[0] < disttol and dt < ttol:
                    match = True
                    fi.append(j)
                    ti[j].append(i)
                    ri.append(i)
                    break

            if not match:
                fi.append(len(ui))
                ui.append(i)
                ti.append([i])
        ui = np.array(ui)
        ti = [np.array(tii) for tii in ti]
        fi = np.array(fi)
        ri = np.array(ri)
        logp_mesh = data_mesh[ui, :]
        if len(ri) > 0:
            repeat_mesh = data_mesh[ri, :]
        else:
            repeat_mesh = np.array([])
    else:
        ui = np.arange(len(t))
        ti = [np.array([uii]) for uii in ui]
        fi = ui
        ri = np.array([])
        logp_mesh = data_mesh
        repeat_mesh = np.array([])

    # =====================
    # = Create PyMC model =
    # =====================

    init_OK = False
    while not init_OK:

        # Flat prior on m_const (mu).
        m_const = pm.Uninformative('m_const', value=-3.)
        if with_stukel:
            m_const.value = -1.1

        # Flat prior on coefficient of time (k).
        t_coef = pm.Uninformative('t_coef', value=.1)
        if with_stukel:
            t_coef.value = -.4

        # Inverse-gamma prior on nugget variance V.
        tau = pm.Gamma('tau', value=2., alpha=.001, beta=.001 / .25)
        V = pm.Lambda('V', lambda tau=tau: 1. / tau)

        vars_to_writeout = ['V', 'm_const', 't_coef']

        # Pull out covariate information.
        # The values of covariate_dict are (Stochastic, interpolated covariate) tuples.
        # Interpolation is done to the data mesh.
        covariate_dict = {}
        for cname in covariate_names:
            # hf = openFile(mbgw.__path__[0] + '/auxiliary_data/' + cname + '.hdf5')
            if cname == 'periurb':
                this_interp_covariate = all_pts.URB_CLS == 2
                if np.sum(all_pts.URB_CLS == 3) < 10:
                    print 'Warning: Very few urban points, using same coefficient for urban and periurban'
                    this_interp_covariate += all_pts.URB_CLS == 3
            elif cname == 'urb':
                if np.sum(all_pts.URB_CLS == 3) >= 10:
                    this_interp_covariate = all_pts.URB_CLS == 3
                else:
                    this_interp_covariate = None
            else:
                this_cov = getattr(auxiliary_data, cname)
                this_interp_covariate = nearest_interp(this_cov.long[:],
                                                       this_cov.lat[:],
                                                       this_cov.data,
                                                       data_mesh[:, 0],
                                                       data_mesh[:, 1])
            if this_interp_covariate is not None:
                this_coef = pm.Uninformative(cname + '_coef', value=0.)
                covariate_dict[cname] = (this_coef, this_interp_covariate)

        # Lock down parameters of Stukel's link function to obtain standard logit.
        # These can be freed by removing 'observed' flags, but mixing gets much worse.
        if with_stukel:
            a1 = pm.Uninformative('a1', .5)
            a2 = pm.Uninformative('a2', .8)
        else:
            a1 = pm.Uninformative('a1', 0, observed=True)
            a2 = pm.Uninformative('a2', 0, observed=True)

        transformed_spatial_vars = [V]
        if spatial:
            # Make it easier for inc (psi) to jump across 0: let nonmod_inc roam freely over the reals,
            # and mod it by pi to get the 'inc' parameter.
            nonmod_inc = pm.Uninformative('nonmod_inc', value=.5)
            inc = pm.Lambda('inc',
                            lambda nonmod_inc=nonmod_inc: nonmod_inc % np.pi)

            # Use a uniform prior on sqrt ecc (sqrt ???). Using a uniform prior on ecc itself put too little
            # probability mass on appreciable levels of anisotropy.
            sqrt_ecc = pm.Uniform('sqrt_ecc', value=.1, lower=0., upper=1.)
            ecc = pm.Lambda('ecc', lambda s=sqrt_ecc: s**2)

            # Subjective skew-normal prior on amp (the partial sill, tau) in log-space.
            # Parameters are passed in in manual_MCMC_supervisor.
            log_amp = pm.SkewNormal('log_amp', **amp_params)
            amp = pm.Lambda('amp', lambda log_amp=log_amp: np.exp(log_amp))

            # Subjective skew-normal prior on scale (the range, phi_x) in log-space.
            log_scale = pm.SkewNormal('log_scale', **scale_params)
            scale = pm.Lambda('scale',
                              lambda log_scale=log_scale: np.exp(log_scale))

            # Exponential prior on the temporal scale/range, phi_t. Standard one-over-x
            # doesn't work bc data aren't strong enough to prevent collapse to zero.
            scale_t = pm.Exponential('scale_t', .1)

            # Uniform prior on limiting correlation far in the future or past.
            t_lim_corr = pm.Uniform('t_lim_corr', 0, 1, value=.8)

            # # Uniform prior on sinusoidal fraction in temporal variogram
            sin_frac = pm.Uniform('sin_frac', 0, 1)

            vars_to_writeout.extend([
                'inc', 'ecc', 'amp', 'scale', 'scale_t', 't_lim_corr',
                'sin_frac'
            ])
            transformed_spatial_vars.extend([inc, ecc, amp, scale])

        # Collect stochastic variables with observed=False for the adaptive Metropolis stepper.
        trial_stochs = [v[0] for v in covariate_dict.itervalues()
                        ] + [m_const, tau, a1, a2, t_coef]
        if spatial:
            trial_stochs = trial_stochs + [
                nonmod_inc, sqrt_ecc, log_amp, log_scale, scale_t, t_lim_corr,
                sin_frac
            ]
        nondata_stochs = []
        for stoch in trial_stochs:
            if not stoch.observed:
                nondata_stochs.append(stoch)

        # Collect variables to write out

        # The mean of the field
        @pm.deterministic
        def M(m=m_const, tc=t_coef):
            return pm.gp.Mean(st_mean_comp, m_const=m, t_coef=tc)

        # The mean, evaluated  at the observation points, plus the covariates
        @pm.deterministic(trace=False)
        def M_eval(M=M, lpm=logp_mesh, cv=covariate_dict):
            out = M(lpm)
            for c in cv.itervalues():
                out += c[0] * c[1][ui]
            return out

        # Create covariance and MV-normal F if model is spatial.
        if spatial:
            try:
                # A constraint on the space-time covariance parameters that ensures temporal correlations are
                # always between -1 and 1.
                @pm.potential
                def st_constraint(sd=.5, sf=sin_frac, tlc=t_lim_corr):
                    if -sd >= 1. / (-sf * (1 - tlc) + tlc):
                        return -np.Inf
                    else:
                        return 0.

                # A Deterministic valued as a Covariance object. Uses covariance my_st, defined above.
                @pm.deterministic
                def C(amp=amp,
                      scale=scale,
                      inc=inc,
                      ecc=ecc,
                      scale_t=scale_t,
                      t_lim_corr=t_lim_corr,
                      sin_frac=sin_frac):
                    return pm.gp.FullRankCovariance(my_st,
                                                    amp=amp,
                                                    scale=scale,
                                                    inc=inc,
                                                    ecc=ecc,
                                                    st=scale_t,
                                                    sd=.5,
                                                    tlc=t_lim_corr,
                                                    sf=sin_frac,
                                                    n_threads=cpus)

                # The evaluation of the Covariance object.
                @pm.deterministic(trace=False)
                def C_eval(C=C):
                    return C(logp_mesh, logp_mesh)

                # The field evaluated at the uniquified data locations
                f = pm.MvNormalCov('f', M_eval, C_eval, value=M_eval.value)

                # The field evaluated at all the data locations
                @pm.deterministic(trace=False)
                def f_eval(f=f):
                    return f[fi]

                init_OK = True
            except pm.ZeroProbability, msg:
                print 'Trying again: %s' % msg
                init_OK = False
                gc.collect()

        # if not spatial
        else:
            C = None

            # The field is just the mean, there's no spatially-structured component.
            @pm.deterministic
            def f(M=M_eval):
                return M[fi]

            f_eval = f

            init_OK = True
示例#14
0
@mc.deterministic
def pred(pi=pi):
    return mc.rbinomial(n_pred, pi) / float(n_pred)

### @export 'binomial-fit'
mc.MCMC([pi, obs, pred]).sample(iter, burn, thin, verbose=False, progress_bar=False)

### @export 'binomial-store'
# mc.Matplot.plot(pi)
# pl.savefig('book/graphics/ci-prev_meta_analysis-binomial_diagnostic.png')
results['Binomial'] = dict(pi=pi.stats(), pred=pred.stats())


### @export 'beta-binomial-model'
alpha = mc.Uninformative('alpha', value=4.)
beta = mc.Uninformative('beta', value=1000.)
pi_mean = mc.Lambda('pi_mean', lambda alpha=alpha, beta=beta: alpha/(alpha+beta))
pi = mc.Beta('pi', alpha, beta, value=r)

@mc.potential
def obs(pi=pi):
    return mc.binomial_like(r*n, n, pi)

@mc.deterministic
def pred(alpha=alpha, beta=beta):
    return mc.rbinomial(n_pred, mc.rbeta(alpha, beta)) / float(n_pred)

### @export 'beta-binomial-fit'
mcmc = mc.MCMC([alpha, beta, pi_mean, pi, obs, pred])
mcmc.use_step_method(mc.AdaptiveMetropolis, [alpha, beta])
def KellyModel(x, xerr, y, yerr, xycovar, parts, ngauss = 3):
    #Implementation of Kelly07 model, but without nondetection support



    #Prior as defined in section 6.1 of Kelly07

    alpha = pymc.Uninformative('alpha', value = np.random.uniform(-1, 1))
    parts['alpha'] = alpha
    beta = pymc.Uninformative('beta', value = np.random.uniform(-np.pi/2, np.pi/2))
    parts['beta'] = beta

    sigint2 = pymc.Uniform('sigint2', 1e-4, 1.)
    parts['sigint2'] = sigint2
    
    piprior = pymc.Dirichlet('pi', np.ones(ngauss))
    parts['piprior'] = piprior

    @pymc.deterministic(trace=False)
    def pis(piprior = piprior):
        lastpi = 1. - np.sum(piprior)
        allpi = np.zeros(ngauss)
        allpi[:-1] = piprior
        allpi[-1] = lastpi
        return allpi
    parts['pis'] = pis

    mu0 = pymc.Uninformative('mu0', np.random.uniform(-1, 1))
    parts['mu0'] = mu0
    w2 = pymc.Uniform('w2', 1e-4, 1e4)
    parts['w2'] = w2


    xvars = pymc.InverseGamma('xvars', 0.5, w2, size=ngauss+1)  #dropping the 1/2 factor on w2, because I don't think it matters
    parts['xvars'] = xvars

    @pymc.deterministic(trace=False)
    def tauU2(xvars = xvars):
        return 1./xvars[-1]
    parts['tauU2'] = tauU2

    xmus = pymc.Normal('xmus', mu0, tauU2, size = ngauss)
    parts['xmus'] = xmus

    

    @pymc.observed
    def likelihood(value = 0., x = x, xerr2 = xerr**2, y = y, yerr2 = yerr**2, xycovar = xycovar, 
                   alpha = alpha, beta = beta, sigint2 = sigint2, pis = pis, xmus = xmus, xvars = xvars):

        return stats.kelly_like(x = x, 
                                xerr2 = xerr2,
                                y = y,
                                yerr2 = yerr2,
                                xycovar = xycovar,
                                alpha = alpha,
                                beta = beta,
                                sigint2 = sigint2,
                                pis = pis,
                                mus = xmus,
                                tau2 = xvars[:-1])

    parts['likelihood'] = likelihood
示例#16
0
# model = models[model_choice]

# methodmenu = menu.MenuSystem('Choose appropriate method')
# methodmenu.add_entry('a', 'Line average')
# methodmenu.add_entry('b', 'Line integral')

# method_choice = str(methodmenu.run())
# method = aggregate_options[method_choice]
# print('The method is ' + str(method))

############################################################################
# Set up the model
############################################################################

Q = pm.Uniform('Q', lower=0, upper=1)
tau = pm.Uninformative('tau', value=1)

z = 2.3  # Height of instrument tower.
# Compute perturbation matrix for
reading_predicted = np.empty_like(perturbation)
for i, (reflector, u, theta, T, P, L) in enumerate(
        zip(reflectors, wind_speed, plume_dir, temp, pressure, lvals)):
    params = {'z': z, 'L': L, 'U': u, 'H': h_source}
    reflector = int(reflector)
    reading_predicted[i] = util.line_average([source_x, source_y],
                                             p0_list[reflector],
                                             p1_list[reflector],
                                             z_list[reflector], samples, 1,
                                             h_source, theta, T, P, params,
                                             'gaussian')
示例#17
0
def anneal_w_graphics(n=11, depth=10):
    """ Make an animation of the BDST chain walking on an nxn grid and play it
    """
    ni = 5
    nj = 100
    nk = 5

    beta = mc.Uninformative('beta', value=1.)

    G = nx.grid_graph([n, n])
    G.orig_pos = dict([[v, v] for v in G.nodes_iter()])
    G.pos = dict([[v, v] for v in G.nodes_iter()])

    root = (5, 5)
    bdst = BDST(G, root, depth, beta)

    mod_mc = mc.MCMC([beta, bdst])
    mod_mc.use_step_method(STMetropolis, bdst)
    mod_mc.use_step_method(mc.NoStepper, beta)

    for i in range(ni):
        beta.value = i * 5
        for j in range(nj):
            mod_mc.sample(1)
            T = bdst.value

            for k in range(nk):
                if random.random() < .95:
                    delta_pos = nx.spring_layout(T,
                                                 pos=G.pos,
                                                 fixed=[root],
                                                 iterations=1)
                else:
                    delta_pos = G.orig_pos
                eps = .01
                my_avg = lambda x, y: (x[0] * (1. - eps) + y[0] * eps, x[1] *
                                       (1. - eps) + y[1] * eps)
                for v in G.pos:
                    G.pos[v] = my_avg(G.pos[v], delta_pos[v])
                views.plot_graph_and_tree(G, T, time=1. * k / nk)
                str = ''
                str += ' beta: %.1f\n' % beta.value
                str += ' cur depth: %d (target: %d)\n' % (T.depth, depth)
                sm = mod_mc.step_method_dict[bdst][0]
                str += ' accepted: %d of %d\n' % (sm.accepted,
                                                  sm.accepted + sm.rejected)
                plt.figtext(0, 0, str)
                plt.figtext(1,
                            0,
                            'healthyalgorithms.wordpress.com \n',
                            ha='right')
                plt.axis([-1, n, -1, n])
                plt.axis('off')
                plt.subplots_adjust(0, 0, 1, 1)
                plt.savefig('bdst%06d.png' % (i * nj * nk + j * nk + k))
            print 'accepted:', mod_mc.step_method_dict[bdst][0].accepted

    import subprocess
    subprocess.call(
        'mencoder mf://bdst*.png -mf w=800:h=600 -ovc x264 -of avi -o bdst_G_%d_d_%d.avi'
        % (n, depth),
        shell=True)
    subprocess.call('mplayer -loop 0 bdst_G_%d_d_%d.avi' % (n, depth),
                    shell=True)
    subprocess.call('rm bdst*.png')

    return bdst.value
示例#18
0
pl.xlabel('Rate ($r$)')
pl.ylabel('Study Size ($n$)')
pl.axis([-.0001, .0101, 50., 1500000])
pl.legend(numpoints=1, fancybox=True, shadow=True, prop={'size': 'x-large'})
pl.subplots_adjust(bottom=.13, top=.93)
pl.savefig('book/graphics/binomial-model-funnel.pdf')
pl.savefig('book/graphics/binomial-model-funnel.png')

### @export 'binomial-model-problem'
n = 50000
pop_A_prev = .002
pop_A_N = n
pop_B_prev = .006
pop_B_N = n

pi = mc.Uninformative('pi', value=pop_A_prev)


@mc.potential
def obs(pi=pi):
    return pop_A_prev*pop_A_N*pl.log(pi) + (1-pop_A_prev)*pop_A_N*pl.log(1-pi) \
        + pop_B_prev*pop_B_N*pl.log(pi) + (1-pop_B_prev)*pop_B_N*pl.log(1-pi)


pop_C_N = n
pop_C_k = mc.Binomial('pop_C_k', pop_C_N, pi)
mc.MCMC([pi, obs, pop_C_k]).sample(20000,
                                   10000,
                                   2,
                                   verbose=False,
                                   progress_bar=False)
示例#19
0
def make_model(recs,
               curve_sub,
               curve_params=[],
               pr_type='mixed',
               pr_hists=None,
               pr_samps=None,
               check_inflec=True):
    input_dict = curve_sub(*curve_params)
    arfun = input_dict['arfun']
    fun_params = input_dict['fun_params']

    # if pr_type=='unknown':
    #     splreps = []
    #     for i in xrange(len(pr_hists)):
    #         where_ok = np.where(pr_hists[i][0]>0)
    #         pr_mesh = pr_hists[i][1][where_ok]
    #         lp_mesh = np.log(pr_hists[i][0][where_ok])
    #         splreps.append(UnivariateSpline(pr_mesh, lp_mesh, bbox=[0,1]))
    #
    #     @pm.stochastic(dtype=float)
    #     def pr(value = pr_hists[:,1,10], splreps = splreps):
    #         out=0
    #         for i in xrange(len(value)):
    #             this_value = value[i]
    #             if this_value<0 or this_value>1:
    #                 return -np.inf
    #             else:
    #                 out += splreps[i](this_value)
    #         return out
    if pr_type == 'model_exp':
        pr = recs.mbg_pr
    elif pr_type == 'data':
        pr = recs.pr
    elif pr_type == 'mixed':
        pr = recs.mix_pr
    elif pr_type == 'data_untrans':
        pr = recs.pfpr
    else:
        raise ValueError, 'PR type unknown'

    # # A deterministic that measures the change in attack rate given a certain change in PR.
    # delta_ar = pm.Lambda('delta_ar', lambda fp = fun_params: np.diff(arfun(diff_pts, *fp)))
    fboth = pm.Lambda(
        'fboth',
        lambda fp=fun_params, pr=pr: arfun(np.hstack((pr, xplot)), *fp))

    # Evaluation of trend at PR values
    AR_trend = pm.Lambda('AR_trend',
                         lambda fp=fun_params, pr=pr: arfun(pr, *fp))

    # The function evaluated on the display mesh
    fplot = pm.Lambda('fplot', lambda fp=fun_params: arfun(xplot, *fp))
    pl.clf()
    pl.plot(xplot, fplot.value)

    @pm.potential
    def check_trend(AR=AR_trend, f=fplot):

        if np.any(AR <= 0) or np.any(f <= 0):
            return -np.Inf
        if check_inflec:
            d2 = np.diff(f, 2)
            d2 = d2[np.where(np.abs(d2) > 1e-6)]
            chgs = np.where(np.abs(np.diff(np.sign(d2))) > 1)[0]
            if np.diff(f[-3:], 2) > 0 or len(chgs) > 1:
                return -np.Inf
        return 0

    # Negative-binomial parameters.
    r_int = pm.Exponential('r_int', .0001, value=.3)
    r_lin = pm.Uninformative('r_lin', value=1.)
    r_quad = pm.Uninformative('r_quad', value=.1)

    rplot = pm.Lambda('rplot',
                      lambda r_int=r_int, r_lin=r_lin, r_quad=r_quad: r_int +
                      r_lin * xplot + r_quad * xplot**2)

    @pm.potential
    def check_r(i=r_int, l=r_lin, q=r_quad):
        # if q>0:
        #     xhat = -l / 2 / q
        #     if i + l*xhat + q*xhat*xhat <= 0 and xhat>0:
        #         return -np.Inf
        if l <= 0 or l + 2. * q <= 0:
            return -np.Inf
        if i + l + q <= 0 or i < 0:
            return -np.Inf
        return 0

    # shape parameter of gamma process is multiplied by total survey time
    time_scale_fac = time_scaling(recs.pcd, recs.surv_int)
    tottime = (recs.yr_end - recs.yr_start + 1)
    scale_time = tottime / time_scale_fac
    pop = recs.pyor / tottime

    # Shape parameter of Poisson intensity is only multiplied by scaled survey time.
    r = pm.Lambda('r',
                  lambda i=r_int, l=r_lin, q=r_quad, pr=pr:
                  (i + l * pr + q * pr * pr) * scale_time)

    # scale parameter of Poisson intensity is multiplied by scaled survey time * number of people sampled.
    exp_rate = pm.Lambda('exp_rate', lambda t=AR_trend: scale_time * pop * t)

    # The data
    AR = pm.NegativeBinomial('AR',
                             exp_rate,
                             r,
                             value=recs.cases,
                             observed=True)

    @pm.deterministic(dtype=float)
    def AR_dev(AR=AR, mu=exp_rate, r=r):
        return np.array([
            pm.negative_binomial_like(AR[i], mu[i], r[i])
            for i in xrange(len(AR))
        ])

    out = locals()
    out.update(input_dict)
    return out
示例#20
0
def buildGaussMixture1DModel(halos, ngauss, modeltype='ratio'):

    parts = {}

    ### PDF handling

    massnorm = 1e15

    masses = halos[0]['masses']
    nmasses = len(masses)

    nclusters = len(halos)
    delta_masses = np.zeros((nclusters, nmasses - 1))
    delta_mls = np.zeros((nclusters, nmasses))
    pdfs = np.zeros((nclusters, nmasses))

    #also need to collect some statistics, to init mixture model
    pdfmeans = np.zeros(nclusters)
    pdfwidths = np.zeros(nclusters)

    for i in range(nclusters):

        if modeltype == 'additive':
            delta_masses[i, :] = (masses[1:] - masses[:-1]) / massnorm
            delta_mls[i, :] = (masses - halos[i]['true_mass']) / massnorm
            pdfs[i, :] = halos[i][
                'pdf'] * massnorm  #preserve unitarity under integration
        elif modeltype == 'ratio':
            delta_masses[i, :] = (masses[1:] -
                                  masses[:-1]) / halos[i]['true_mass']
            delta_mls[i, :] = masses / halos[i]['true_mass']
            pdfs[i, :] = halos[i]['pdf'] * halos[i]['true_mass']

        pdfmeans[i] = scipy.integrate.trapz(delta_mls[i, :] * pdfs[i, :],
                                            delta_mls[i, :])
        pdfwidths[i] = np.sqrt(
            scipy.integrate.trapz(
                pdfs[i, :] * (delta_mls[i, :] - pdfmeans[i])**2,
                delta_mls[i, :]))

    datacenter = np.mean(pdfmeans)
    dataspread = np.std(pdfmeans)
    datatypvar = np.mean(pdfwidths)
    dataminsamp = np.min(delta_masses)

    print datacenter, dataspread, datatypvar, dataminsamp

    #### Mixture model priors

    piprior = pymc.Dirichlet('piprior', np.ones(ngauss))
    parts['piprior'] = piprior

    mu0 = pymc.Uninformative(
        'mu0', datacenter + np.random.uniform(-5 * dataspread, 5 * dataspread))
    parts['mu0'] = mu0

    # kelly07 xvars prior.
    #    w2 = pymc.Uniform('w2', 0.1/dataspread**2., 100*max(1./dataspread**2, 1./datatypvar**2))
    #    print w2.parents
    #    parts['w2'] = w2
    #
    #
    #    xvars = pymc.InverseGamma('xvars', 0.5, 0.5*w2, size=ngauss+1)  #dropping the 1/2 factor on w2, because I don't think it matters

    logxsigma = pymc.Uniform('logxsigma',
                             np.log(2 * dataminsamp),
                             np.log(5 * dataspread),
                             size=ngauss + 1)
    parts['logxsigma'] = logxsigma

    @pymc.deterministic(trace=False)
    def xvars(logxsigma=logxsigma):
        return np.exp(logxsigma)**2

    parts['xvars'] = xvars

    @pymc.deterministic(trace=False)
    def tauU2(xvars=xvars):
        return 1. / xvars[-1]

    parts['tauU2'] = tauU2

    xmus = pymc.Normal('xmus', mu0, tauU2, size=ngauss)
    parts['xmus'] = xmus

    @pymc.observed
    def data(value=0.,
             delta_mls=delta_mls,
             delta_masses=delta_masses,
             pdfs=pdfs,
             piprior=piprior,
             xmus=xmus,
             xvars=xvars):

        #complete pi
        pis = pymc.extend_dirichlet(piprior)

        #        print pis

        #        #enforce identiability by ranking means
        #        for i in range(xmus.shape[0]-1):
        #            if (xmus[i] >= xmus[i+1:]).any():
        #                raise pymc.ZeroProbability
        #

        return dlntools.pdfGaussMix1D(delta_mls=delta_mls,
                                      delta_masses=delta_masses,
                                      pdfs=pdfs,
                                      pis=pis,
                                      mus=xmus,
                                      tau2=xvars[:-1])

    parts['data'] = data

    return parts
示例#21
0
def pred(pi=pi):
    return mc.rpoisson(pi * n_pred) / float(n_pred)


### @export 'poisson-fit-and-store'
mc.MCMC([pi, obs, pred]).sample(iter,
                                burn,
                                thin,
                                verbose=False,
                                progress_bar=False)

results['Poisson'] = dict(pred=pred.stats(), pi=pi.stats())

### @export 'negative-binomial-model'
pi = mc.Uniform('pi', lower=0, upper=1, value=.5)
delta = mc.Uninformative('delta', value=100.)


@mc.potential
def obs(pi=pi, delta=delta):
    return mc.negative_binomial_like(r * n, pi * n, delta)


@mc.deterministic
def pred(pi=pi, delta=delta):
    return mc.rnegative_binomial(pi * n_pred, delta) / float(n_pred)


### @export 'negative-binomial-fit-and-store'
mc.MCMC([pi, delta, obs, pred]).sample(iter,
                                       burn,
示例#22
0

pl.figure(figsize=(11, 8.5), dpi=120)
pl.subplots_adjust(wspace=.4)
pl.subplot(2, 2, 1)
plot_beta_binomial_funnel(4., 996.)

pl.subplot(2, 2, 2)
plot_beta_binomial_funnel(40., 9960.)

pl.subplot(2, 1, 2)
r = pl.array(schiz['r'])
n = pl.array(schiz['n'], dtype=int)
k = r * n

alpha = mc.Uninformative('alpha', value=1.)
beta = mc.Uninformative('beta', value=999.)
pi = mc.Beta('pi', alpha, beta, value=.001 * pl.ones(16))
pi_mean = mc.Lambda('pi_mean',
                    lambda alpha=alpha, beta=beta: alpha / (alpha + beta))


@mc.potential
def obs(pi=pi):
    return mc.binomial_like(k, n, pi)


@mc.deterministic
def pred(pi=pi, alpha=alpha, beta=beta):
    return mc.rbetabin(alpha, beta, n)
示例#23
0
# =========================================
#
# This notebook implements and compares samplers in PyMC
# to sample uniformly from an $n$-dimensional ball,
# i.e to sample from the set
# $$
# \mathbf{B}_n = \\{x \in \mathbf{R}^n: \|x\|\leq 1\\}
# $$

# <codecell>

mc.np.random.seed(1234567)

# simple model
n = 2
X = [mc.Uninformative('X_%d' % i, value=0) for i in range(n)]


@mc.potential
def in_ball(X=X):
    if X[0]**2 + X[1]**2 <= 1.:
        return 0
    else:
        return -pl.inf


# <codecell>


class UniformBall(mc.Gibbs):
    def __init__(self, stochastic, others, verbose=None):
示例#24
0
def make_model(lon,lat,input_data,covariate_keys,n_male,male_pos,n_fem,fem_pos):
    """
    This function is required by the generic MBG code.
    """
    
    # How many nuggeted field points to handle with each step method
    grainsize = 10

    # Unique data locations
    data_mesh, logp_mesh, fi, ui, ti = uniquify(lon,lat)
    
    a = pm.Exponential('a', .01, value=1)
    b = pm.Exponential('b', .01, value=1)
    
        
    init_OK = False
    while not init_OK:
        try:
            # The partial sill.
            amp = pm.Exponential('amp', .1, value=1.)

            # The range parameters. Units are RADIANS. 
            # 1 radian = the radius of the earth, about 6378.1 km
            scale = pm.Exponential('scale', .1, value=.08)            

            # This parameter controls the degree of differentiability of the field.
            diff_degree = pm.Uniform('diff_degree', .01, 3)

            # The nugget variance.
            V = pm.Exponential('V', .1, value=.1)

            @pm.potential
            def V_constraint(V=V):
                if V<.1:
                    return -np.inf
                else:
                    return 0
            
            m = pm.Uninformative('m',value=-25)
            @pm.deterministic(trace=False)
            def M(m=m):
                return pm.gp.Mean(mean_fn, m=m)

            # Create the covariance & its evaluation at the data locations.
            facdict = dict([(k,1.e6) for k in covariate_keys])
            facdict['m'] = 0
            @pm.deterministic(trace=False)
            def C(amp=amp, scale=scale, diff_degree=diff_degree, ck=covariate_keys, id=input_data, ui=ui, facdict=facdict):
                """A covariance function created from the current parameter values."""
                eval_fn = CovarianceWithCovariates(cut_matern, id, ck, ui, fac=facdict)
                return pm.gp.FullRankCovariance(eval_fn, amp=amp, scale=scale, diff_degree=diff_degree)

            sp_sub = pm.gp.GPSubmodel('sp_sub', M, C, logp_mesh, tally_f=False)
                
            init_OK = True
        except pm.ZeroProbability:
            init_OK = False
            cls,inst,tb = sys.exc_info()
            print 'Restarting, message %s\n'%inst.message

    # Make f start somewhere a bit sane
    sp_sub.f_eval.value = sp_sub.f_eval.value - np.mean(sp_sub.f_eval.value)

    # Loop over data clusters
    eps_p_f_d = []
    s_d = []
    male_d = []
    het_def_d = []
    fem_d = []

    for i in xrange(len(male_pos)/grainsize+1):
        sl = slice(i*grainsize,(i+1)*grainsize,None)        
        if len(male_pos[sl])>0:
            # Nuggeted field in this cluster
            eps_p_f_d.append(pm.Normal('eps_p_f_%i'%i, sp_sub.f_eval[fi[sl]], 1./V, trace=False))            

            # The allele frequency
            s_d.append(pm.Lambda('s_%i'%i,lambda lt=eps_p_f_d[-1]: invlogit(lt), trace=False))
            
            where_male = np.where(True-np.isnan(n_male[sl]))[0]
            where_fem = np.where(True-np.isnan(n_fem[sl]))[0]
            if len(where_male) > 0:
                male_d.append(pm.Binomial('male_%i'%i, n_male[sl][where_male], s_d[-1][where_male], value=male_pos[sl][where_male], observed=True))
            if len(where_fem) > 0:
                het_def_d.append(pm.Beta('het_def_%i'%i, alpha=a, beta=b, size=len(where_fem), trace=False))
                p = s_d[-1][where_fem]
                p_def = pm.Lambda('p_def', lambda p=p, h=het_def_d[-1]: p_fem_def(p, h), trace=False)
                fem_d.append(pm.Binomial('fem_%i'%i, n_fem[sl][where_fem], p_def, value=fem_pos[sl][where_fem], observed=True))
    
    # The field plus the nugget
    @pm.deterministic
    def eps_p_f(eps_p_fd = eps_p_f_d):
        """Concatenated version of eps_p_f, for postprocessing & Gibbs sampling purposes"""
        return np.hstack(eps_p_fd)

    # The heterozygote deficiency
    @pm.deterministic
    def het_def(het_def_d = het_def_d):
        return np.hstack(het_def_d)
            
    return locals()
示例#25
0
    def __init__(self,
                 predictions,
                 measurements,
                 uncertainties,
                 regularization_strength=1.0,
                 precision=None,
                 prior_pops=None):
        """Bayesian Energy Landscape Tilting with maximum entropy prior and correlation-corrected likelihood.

        Parameters
        ----------
        predictions : ndarray, shape = (num_frames, num_measurements)
            predictions[j, i] gives the ith observabled predicted at frame j
        measurements : ndarray, shape = (num_measurements)
            measurements[i] gives the ith experimental measurement
        uncertainties : ndarray, shape = (num_measurements)
            uncertainties[i] gives the uncertainty of the ith experiment
        regularization_strength : float
            How strongly to weight the MVN prior (e.g. lambda)
        precision : ndarray, optional, shape = (num_measurements, num_measurements)
            The precision matrix of the predicted observables.
        prior_pops : ndarray, optional, shape = (num_frames)
            Prior populations of each conformation.  If None, use uniform populations.
        """

        BELT.__init__(self,
                      predictions,
                      measurements,
                      uncertainties,
                      prior_pops=prior_pops)

        if precision == None:
            precision = np.cov(predictions.T)
            if precision.ndim == 0:
                precision = precision.reshape((1, 1))

        self.alpha = pymc.Uninformative(
            "alpha", value=np.zeros(self.num_measurements)
        )  # The prior on alpha is defined as a potential, so we use Uninformative variables here.
        self.initialize_variables()

        @pymc.potential
        def logp_prior(populations=self.populations,
                       mu=self.mu,
                       prior_pops=self.prior_pops):
            if populations.min() <= 0:
                return -1 * np.inf
            else:
                return -1 * regularization_strength * (
                    populations * (np.log(populations / prior_pops))).sum()

        self.logp_prior = logp_prior

        rho = np.corrcoef(predictions.T)
        rho_inverse = np.linalg.inv(rho)

        @pymc.potential
        def logp(populations=self.populations, mu=self.mu):
            z = (mu - measurements) / uncertainties
            chi2 = rho_inverse.dot(z)
            chi2 = z.dot(chi2)
            return -0.5 * chi2

        self.logp = logp
示例#26
0
文件: inla.py 项目: apatil/pdefields
def make_model(X):
    neighbors, triangles, trimap, b = spherical.triangulate_sphere(X)
    # spherical.plot_triangulation(X,neighbors)

    # Matrix generation
    triangle_areas = [spherical.triangle_area(X, t) for t in triangles]
    Ctilde = spherical.Ctilde(X, triangles, triangle_areas)
    C = spherical.C(X, triangles, triangle_areas)
    G = spherical.G(X, triangles, triangle_areas)

    # Operator generation
    Ctilde = cholmod.into_matrix_type(Ctilde)
    G = cholmod.into_matrix_type(G)

    # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa.
    amp = pm.Exponential('amp', .0001, value=100)

    # A constant mean.
    m = pm.Uninformative('m', value=0)

    @pm.deterministic(trace=False)
    def M(m=m, n=len(X)):
        """The mean vector"""
        return np.ones(n) * m

    kappa = pm.Exponential('kappa', 1, value=3)
    alpha = pm.DiscreteUniform('alpha', 1, 10, value=2., observed=True)

    @pm.deterministic(trace=False)
    def Q(kappa=kappa, alpha=alpha, amp=amp):
        out = operators.mod_frac_laplacian_precision(
            Ctilde, G, kappa, alpha, cholmod) / np.asscalar(amp)**2
        return out

    # Nailing this ahead of time reduces time to compute logp from .18 to .13s for n=25000.
    pattern_products = cholmod.pattern_to_products(Q.value)
    # @pm.deterministic
    # def pattern_products(Q=Q):
    #     return cholmod.pattern_to_products(Q)

    @pm.deterministic(trace=False)
    def precision_products(Q=Q, p=pattern_products):
        try:
            return cholmod.precision_to_products(Q, **p)
        except cholmod.NonPositiveDefiniteError:
            return None

    S = pymc_objects.SparseMVN('S', M, precision_products, cholmod)

    vars = pm.rgamma(4, 4, size=n)
    vals = X[:, 2]

    data = pm.Normal('data', S, 1. / vars, value=vals, observed=True)

    Qobs = sparse.csc_matrix((n, n))
    Qobs.setdiag(1. / vars)

    @pm.deterministic(trace=False)
    def true_evidence(Q=Q, M=M, vals=vals, vars=vars):
        C = np.array(Q.todense().I + np.diag(vars))
        return pm.mv_normal_cov_like(vals, M, C)

    # Stuff for the scoring algorithm-based full conditional
    def first_likelihood_derivative(x, vals=vals, vars=vars):
        return -(x - vals) / vars

    def second_likelihood_derivative(x, vals=vals, vars=vars):
        return -1. / vars

    return locals()
def fit_blackbody_montecarlo(frequency,
                             seds,
                             errors=None,
                             temperature_guess=10,
                             beta_guess=None,
                             scale_guess=None,
                             blackbody_function=blackbody,
                             quiet=True,
                             return_MC=True,
                             nsamples=5000,
                             burn=1000,
                             min_temperature=0,
                             max_temperature=100,
                             scale_keyword='scale',
                             max_scale=1e60,
                             multivariate=False,
                             **kwargs):
    """
    Parameters
    ----------
    frequency : array
        Array of frequency values
    flux : array
        array of flux values
    err : array (optional)
        Array of error values (1-sigma, normal)
    temperature_guess : float
        Input / starting point for temperature
    min_temperature : float
    max_temperature : float
        Lower/Upper limits on fitted temperature
    beta_guess : float (optional)
        Opacity beta value
    scale_guess : float
        Arbitrary scale value to apply to model to get correct answer
    blackbody_function: function
        Must take x-axis (e.g. frequency), temperature, then scale and beta
        keywords (dependence on beta can be none)
    return_MC : bool
        Return the pymc.MCMC object?
    nsamples : int
        Number of samples to use in determining the posterior distribution
        (the answer)
    burn : int
        number of initial samples to ignore
    scale_keyword : ['scale','logscale','logN']
        What scale keyword to pass to the blackbody function to determine
        the amplitude
    kwargs : kwargs
        passed to blackbody function
    """

    d = {}

    d['temperature'] = pymc.distributions.Uniform('temperature',
                                                  min_temperature,
                                                  max_temperature,
                                                  value=temperature_guess)
    d['scale'] = pymc.distributions.Uniform('scale',
                                            0,
                                            max_scale,
                                            value=scale_guess)
    if beta_guess is not None:
        d['beta'] = pymc.distributions.Uniform('beta', 0, 10, value=beta_guess)
    else:
        d['beta'] = pymc.distributions.Uniform('beta', 0, 10, value=1)

    covar_list = dict([
        ((i, j), pymc.Uninformative('%s-%s' % (i, j), value=(i == j)))
        for i, j in itertools.combinations_with_replacement(('t', 'b', 's'), 2)
    ])
    for i, j in itertools.permutations(('t', 'b', 's'), 2):
        if (i, j) in covar_list:
            covar_list[(j, i)] = covar_list[(i, j)]
    covar_grid = [[covar_list[(i, j)] for i in ('t', 'b', 's')]
                  for j in ('t', 'b', 's')]
    d['tbcov'] = pymc.MvNormalCov(
        'tbcov',
        mu=[d['temperature'], d['beta'], d['scale']],
        C=covar_grid,
        value=[d['temperature'], d['beta'], d['scale']])

    precision_list = dict([
        ((i, j), pymc.Uninformative('%s-%s' % (i, j), value=(i == j)))
        for i, j in itertools.combinations_with_replacement(('t', 'b', 's'), 2)
    ])
    for i, j in itertools.permutations(('t', 'b', 's'), 2):
        if (i, j) in precision_list:
            precision_list[(j, i)] = precision_list[(i, j)]
    precision_grid = [[precision_list[(i, j)] for i in ('t', 'b', 's')]
                      for j in ('t', 'b', 's')]
    # need to force tau > 0...
    d['tbprec'] = pymc.MvNormalCov(
        'tbprec',
        mu=[d['temperature'], d['beta'], d['scale']],
        C=precision_grid,
        value=[1, 1, 1])

    for ii, (sed, err) in enumerate(zip(seds, errors)):
        d['t_%i' % ii] = pymc.Normal('t_%i' % ii,
                                     mu=d['tbcov'][0],
                                     tau=d['tbprec'][0])
        d['b_%i' % ii] = pymc.Normal('b_%i' % ii,
                                     mu=d['tbcov'][1],
                                     tau=d['tbprec'][1])
        d['s_%i' % ii] = pymc.Normal('s_%i' % ii,
                                     mu=d['tbcov'][2],
                                     tau=d['tbprec'][2])

        def bb_model(temperature=d['t_%i' % ii],
                     scale=d['s_%i' % ii],
                     beta=d['b_%i' % ii]):
            kwargs[scale_keyword] = scale
            y = blackbody_function(frequency,
                                   temperature,
                                   beta=beta,
                                   normalize=False,
                                   **kwargs)
            #print kwargs,beta,temperature,(-((y-flux)**2)).sum()
            return y

        d['bb_model_%i' % ii] = pymc.Deterministic(eval=bb_model,
                                                   name='bb_model_%i' % ii,
                                                   parents={
                                                       'temperature':
                                                       d['t_%i' % ii],
                                                       'scale':
                                                       d['s_%i' % ii],
                                                       'beta':
                                                       d['b_%i' % ii]
                                                   },
                                                   doc='Blackbody SED model.',
                                                   trace=True,
                                                   verbose=0,
                                                   dtype=float,
                                                   plot=False,
                                                   cache_depth=2)

        if err is None:
            d['err_%i' % ii] = pymc.distributions.Uninformative('error_%i' %
                                                                ii,
                                                                value=1.)
        else:
            d['err_%i' % ii] = pymc.distributions.Uninformative('error_%i' %
                                                                ii,
                                                                value=err,
                                                                observed=True)

        d['flux_%i' % ii] = pymc.distributions.Normal('flux_%i' % ii,
                                                      mu=d['bb_model_%i' % ii],
                                                      tau=1. /
                                                      d['err_%i' % ii]**2,
                                                      value=sed,
                                                      observed=True)

    #print d.keys()
    MC = pymc.MCMC(d)

    if nsamples > 0:
        MC.sample(nsamples, burn=burn)
        if return_MC:
            return MC

        MCfit = pymc.MAP(MC)
        MCfit.fit()
        T = MCfit.temperature.value
        scale = MCfit.scale.value

        if beta_guess is not None:
            beta = MCfit.beta.value
            return T, scale, beta
        else:
            return T, scale

    return MC
示例#28
0
def make_model(N,k,X,backend,manifold):
    """
    A standard spatial logistic regression.
    - N: Number sampled at each location
    - k: Number positive at each location
    - X: x,y,z coords of each location
    - Backend: The linear algebra backend. So far, this has to be 'cholmod'. 
    - manifold: The manifold to work on. So far, this has to be 'spherical'.
    """
    
    # Make the Delaunay triangulation.
    neighbors, triangles, trimap, b = manifold.triangulate_sphere(X)

    # Uncomment to visualize the triangulation.
    # manifold.plot_triangulation(X,neighbors)

    # Generate the C, Ctilde and G matrix in SciPy 'lil' format.
    triangle_areas = [manifold.triangle_area(X, t) for t in triangles]
    Ctilde = manifold.Ctilde(X, triangles, triangle_areas)
    C = manifold.C(X, triangles, triangle_areas)
    G = manifold.G(X, triangles, triangle_areas)

    # Convert to SciPy 'csc' format for efficient use by the CHOLMOD backend.
    C = backend.into_matrix_type(C)
    Ctilde = backend.into_matrix_type(Ctilde)
    G = backend.into_matrix_type(G)

    # Kappa is the scale parameter. It's a free variable.
    kappa = pm.Exponential('kappa',1,value=3)

    # Fix the value of alpha.
    alpha = 2.

    # amp is the overall amplitude. It's a free variable that will probably be highly confounded with kappa.
    amp = pm.Exponential('amp', .0001, value=100)

    # A constant mean.
    m = pm.Uninformative('m',value=0)
    
    @pm.deterministic(trace=False)
    def M(m=m,n=len(X)):
        """The mean vector"""
        return np.ones(n)*m
        
    @pm.deterministic(trace=False)
    def Q(kappa=kappa, alpha=alpha, amp=amp, Ctilde=Ctilde, G=G, backend=backend):
        "The precision matrix."
        out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha, backend)/np.asscalar(amp)**2
        return out

    # Do all the precomputation you can based on the sparsity pattern alone.
    # Note that if alpha is made free, this needs to be free also, as the sparsity
    # pattern will be changeable.
    pattern_products = backend.pattern_to_products(Q.value)

    @pm.deterministic(trace=False)
    def precision_products(Q=Q, p=pattern_products):
        "All the analysis of the precision matrix that the backend needs to do MVN computations."
        try: 
            return backend.precision_to_products(Q, **p)
        except backend.NonPositiveDefiniteError:
            return None

    # The random field.
    empirical_S = pm.logit((k+1)/(N+2.))
    S=pymc_objects.SparseMVN('S',M, precision_products, backend, value=empirical_S)
    
    @pm.deterministic(trace=False)
    def p(S=S):
        """The success probability."""
        return pm.invlogit(S)

    # The data.
    data = pm.Binomial('data', n=N, p=p, value=k, observed=True)
    
    # A Fortran representation of the likelihood, to allow for fast Metropolis steps without querying data.logp.
    likelihood_variables = np.vstack((np.resize(N,k.shape),k)).T
    likelihood_string = """
    lkp = dexp({X})/(1.0D0+dexp({X}))
    lkp = lv(i,2)*dlog(lkp) + (lv(i,1)-lv(i,2))*dlog(1.0D0-lkp)
    """
    
    return locals()
                      'var2_t': pymc.Gamma,
                      }
scipy_distributions = {'a_t': lambda a, b: st.beta.rvs(a, b),
                       'mu1_t': lambda a, b: st.gamma.rvs(a, scale=1/b),
                       'mu2_t': lambda a, b: st.gamma.rvs(a, scale=1/b),
                       'var1_t': lambda a, b: st.gamma.rvs(a, scale=1/b),
                       'var2_t': lambda a, b: st.gamma.rvs(a, scale=1/b),
                       }
# %%
variabili_ = ['mu1_t','var1_t','mu2_t','var2_t']
param_ = [mu1,var1,mu2,var2]
for var_i in np.arange(2):
    variable = variabili_[var_i]
    param = param_[var_i]
    groups = {k:group for k, group in database_random.groupby('sampleID')[variable]}
    a = pymc.Uninformative('a', value=1)
    b = pymc.Uninformative('b', value=1)
    variables = [a, b]
    distribution = pymc_distributions[variable]
    for k, g in groups.items():
        obs = distribution('obs{}'.format(k),
                           alpha = a,
                           beta = b,
                           observed=True,
                           value=g.values)
        variables.append(obs)

    model_map = pymc.MAP(variables)
    model_map.fit()
    model_mcmc = pymc.MCMC(variables)
    model_mcmc.sample(1e5)
示例#30
0
    def make_model(self, sim, params):
        initial_values = {}
        initial_components = {}
        model_dict = {}

        def runit(**kwargs):
            sim.params(**kwargs)
            for key in initial_values:
                initial_components[key].initial_value = initial_values[key]

            try:
                sim.run_fast()
                return 0
            except FloatingPointError:
                return -1

        params['std_dev'] = params.get('std_dev', [1e-3, 200])
        with pymc.Model() as model:
            std_dev = pymc.Uniform('std_dev', params['std_dev'][0],
                                   params['std_dev'][1])
            del params['std_dev']

            #         @pymc.deterministic(plot=False)
            #         def precision(std_dev=std_dev):
            #             return 1.0 / (std_dev * std_dev)
            #
            for key in params:
                if key.startswith('initial_'):
                    if params[key][0] is None:
                        initial_values[key] = pymc.Uninformative(
                            key, value=params[key][1])
                    else:
                        if len(params[key]) == 2:
                            initial_values[key] = pymc.Uniform(
                                key, params[key][0], params[key][1])
                        elif len(params[key]) == 3:
                            initial_values[key] = pymc.Uniform(
                                key,
                                params[key][0],
                                params[key][1],
                                value=params[key][2])
                        else:
                            raise ValueError

                    name = key.split('initial_')[1]
                    _c = sim.get_component(name)
                    initial_components[key] = _c
                else:
                    if params[key][0] is None:
                        params[key] = pymc.Uninformative(key,
                                                         value=params[key][1])
                    else:
                        if len(params[key]) == 2:
                            params[key] = pymc.Uniform(key, params[key][0],
                                                       params[key][1])
                        elif len(params[key]) == 3:
                            params[key] = pymc.Uniform(key,
                                                       params[key][0],
                                                       params[key][1],
                                                       value=params[key][2])
                        else:
                            raise ValueError

            for key in initial_values:
                del params[key]

            run_sim = pymc.Deterministic(eval=runit,
                                         doc="this",
                                         name='run_sim',
                                         parents=dict(
                                             list(params.items()) +
                                             list(initial_values.items())))

            def make_fun(var):
                def fun(run_sim=run_sim, sim=sim):
                    _c = sim.get_component(var)
                    if _c.data:
                        t = _c.data['t']
                        value = sim.interpolate(t, var)
                    else:
                        t = sim.maximum_t
                        value = sim.interpolate(t, var)
                    return value

                return fun

            for _c in sim.components + sim.assignments:
                params[_c.name] = pymc.Deterministic(eval=make_fun(_c.name),
                                                     doc=_c.name,
                                                     name=_c.name,
                                                     parents={
                                                         'run_sim': run_sim,
                                                         'sim': sim
                                                     })
                if _c.data:
                    varname = _c.name + '_data'
                    params[varname] = pymc.Normal(varname,
                                                  mu=params[_c.name],
                                                  tau=1.0 / std_dev**2,
                                                  observed=True,
                                                  value=_c.data['value'])

        return model