Пример #1
0
def create_fake_observation():
    """Create a subsample with defined property"""

    # Create a subsample of a larger sample such that we can compute
    # the expected probability of the unseen portion.
    # This is used in the tests of lladser_pe and lladser_ci
    counts = np.ones(1001, dtype='int64')
    counts[0] = 9000
    total = counts.sum()

    fake_obs = subsample(counts, 1000)
    exp_p = 1 - sum([x/total for (x, y) in zip(counts, fake_obs) if y > 0])

    return fake_obs, exp_p
Пример #2
0
def create_fake_observation():
    """Create a subsample with defined property"""

    # Create a subsample of a larger sample such that we can compute
    # the expected probability of the unseen portion.
    # This is used in the tests of lladser_pe and lladser_ci
    counts = np.ones(1001, dtype='int64')
    counts[0] = 9000
    total = counts.sum()

    fake_obs = subsample(counts, 1000)
    exp_p = 1 - sum([x / total for (x, y) in zip(counts, fake_obs) if y > 0])

    return fake_obs, exp_p
Пример #3
0
def michaelis_menten_fit(counts, num_repeats=1, params_guess=None):
    """Calculate Michaelis-Menten fit to rarefaction curve of observed OTUs.

    The Michaelis-Menten equation is defined as

    .. math::

       S=\\frac{nS_{max}}{n+B}

    where :math:`n` is the number of individuals and :math:`S` is the number of
    OTUs. This function estimates the :math:`S_{max}` parameter.

    The fit is made to datapoints for :math:`n=1,2,...,N`, where :math:`N` is
    the total number of individuals (sum of abundances for all OTUs).
    :math:`S` is the number of OTUs represented in a random sample of :math:`n`
    individuals.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    num_repeats : int, optional
        The number of times to perform rarefaction (subsampling without
        replacement) at each value of :math:`n`.
    params_guess : tuple, optional
        Initial guess of :math:`S_{max}` and :math:`B`. If ``None``, default
        guess for :math:`S_{max}` is :math:`S` (as :math:`S_{max}` should
        be >= :math:`S`) and default guess for :math:`B` is ``round(N / 2)``.

    Returns
    -------
    S_max : double
        Estimate of the :math:`S_{max}` parameter in the Michaelis-Menten
        equation.

    See Also
    --------
    skbio.math.subsample

    Notes
    -----
    There is some controversy about how to do the fitting. The ML model given
    in [1]_ is based on the assumption that error is roughly proportional to
    magnitude of observation, reasonable for enzyme kinetics but not reasonable
    for rarefaction data. Here we just do a nonlinear curve fit for the
    parameters using least-squares.

    References
    ----------
    .. [1] Raaijmakers, J. G. W. 1987 Statistical analysis of the
       Michaelis-Menten equation. Biometrics 43, 793-803.

    """
    counts = _validate(counts)

    n_indiv = counts.sum()
    if params_guess is None:
        S_max_guess = observed_otus(counts)
        B_guess = int(round(n_indiv / 2))
        params_guess = (S_max_guess, B_guess)

    # observed # of OTUs vs # of individuals sampled, S vs n
    xvals = np.arange(1, n_indiv + 1)
    ymtx = np.empty((num_repeats, len(xvals)), dtype=int)
    for i in range(num_repeats):
        ymtx[i] = np.asarray([observed_otus(subsample(counts, n))
                              for n in xvals], dtype=int)
    yvals = ymtx.mean(0)

    # Vectors of actual vals y and number of individuals n.
    def errfn(p, n, y):
        return (((p[0] * n / (p[1] + n)) - y) ** 2).sum()

    # Return S_max.
    return fmin_powell(errfn, params_guess, ftol=1e-5, args=(xvals, yvals),
                       disp=False)[0]