示例#1
0
def goods_coverage(counts):
    r"""Calculate Good's coverage of counts.

    Good's coverage estimator is defined as

    .. math::

       1-\frac{F_1}{N}

    where :math:`F_1` is the number of singleton OTUs and :math:`N` is the
    total number of individuals (sum of abundances for all OTUs).

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Good's coverage estimator.

    """
    counts = _validate_counts_vector(counts)
    f1 = singles(counts)
    N = counts.sum()
    return 1 - (f1 / N)
示例#2
0
def robbins(counts):
    r"""Calculate Robbins' estimator for the probability of unobserved outcomes.

    Robbins' estimator is defined as:

    .. math::

       \frac{F_1}{n+1}

    where :math:`F_1` is the number of singleton OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Robbins' estimate.

    Notes
    -----
    Robbins' estimator is defined in [1]_. The estimate computed here is for
    :math:`n-1` counts, i.e. the x-axis is off by 1.

    References
    ----------
    .. [1] Robbins, H. E (1968). Ann. of Stats. Vol 36, pp. 256-257.

    """
    counts = _validate_counts_vector(counts)
    return singles(counts) / counts.sum()
示例#3
0
def osd(counts):
    """Calculate observed OTUs, singles, and doubles.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    osd : tuple
        Observed OTUs, singles, and doubles.

    See Also
    --------
    observed_otus
    singles
    doubles

    Notes
    -----
    This is a convenience function used by many of the other measures that rely
    on these three measures.

    """
    counts = _validate_counts_vector(counts)
    return observed_otus(counts), singles(counts), doubles(counts)
示例#4
0
def goods_coverage(counts):
    r"""Calculate Good's coverage of counts.

    Good's coverage estimator is defined as

    .. math::

       1-\frac{F_1}{N}

    where :math:`F_1` is the number of singleton OTUs and :math:`N` is the
    total number of individuals (sum of abundances for all OTUs).

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Good's coverage estimator.

    """
    counts = _validate_counts_vector(counts)
    f1 = singles(counts)
    N = counts.sum()
    return 1 - (f1 / N)
示例#5
0
def robbins(counts):
    r"""Calculate Robbins' estimator for the probability of unobserved outcomes.

    Robbins' estimator is defined as:

    .. math::

       \frac{F_1}{n+1}

    where :math:`F_1` is the number of singleton OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Robbins' estimate.

    Notes
    -----
    Robbins' estimator is defined in [1]_. The estimate computed here is for
    :math:`n-1` counts, i.e. the x-axis is off by 1.

    References
    ----------
    .. [1] Robbins, H. E (1968). Ann. of Stats. Vol 36, pp. 256-257.

    """
    counts = _validate_counts_vector(counts)
    return singles(counts) / counts.sum()
示例#6
0
def osd(counts):
    """Calculate observed OTUs, singles, and doubles.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    osd : tuple
        Observed OTUs, singles, and doubles.

    See Also
    --------
    observed_otus
    singles
    doubles

    Notes
    -----
    This is a convenience function used by many of the other measures that rely
    on these three measures.

    """
    counts = _validate_counts_vector(counts)
    return observed_otus(counts), singles(counts), doubles(counts)
示例#7
0
def fisher_alpha(counts):
    r"""Calculate Fisher's alpha, a metric of diversity.

    Fisher's alpha is estimated by solving the following equation for
    :math:`\alpha`:

    .. math::

       S=\alpha\ln(1+\frac{N}{\alpha})

    where :math:`S` is the number of OTUs and :math:`N` is the
    total number of individuals in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Fisher's alpha.

    Raises
    ------
    RuntimeError
        If the optimizer fails to converge (error > 1.0).

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_. Uses ``scipy.optimize.minimize_scalar`` to find
    Fisher's alpha.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    n = counts.sum()
    s = observed_otus(counts)

    def f(alpha):
        return (alpha * np.log(1 + (n / alpha)) - s) ** 2

    # Temporarily silence RuntimeWarnings (invalid and division by zero) during
    # optimization in case invalid input is provided to the objective function
    # (e.g. alpha=0).
    orig_settings = np.seterr(divide='ignore', invalid='ignore')
    try:
        alpha = minimize_scalar(f).x
    finally:
        np.seterr(**orig_settings)

    if f(alpha) > 1.0:
        raise RuntimeError("Optimizer failed to converge (error > 1.0), so "
                           "could not compute Fisher's alpha.")
    return alpha
示例#8
0
def fisher_alpha(counts):
    r"""Calculate Fisher's alpha, a metric of diversity.

    Fisher's alpha is estimated by solving the following equation for
    :math:`\alpha`:

    .. math::

       S=\alpha\ln(1+\frac{N}{\alpha})

    where :math:`S` is the number of OTUs and :math:`N` is the
    total number of individuals in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Fisher's alpha.

    Raises
    ------
    RuntimeError
        If the optimizer fails to converge (error > 1.0).

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_. Uses ``scipy.optimize.minimize_scalar`` to find
    Fisher's alpha.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    n = counts.sum()
    s = observed_otus(counts)

    def f(alpha):
        return (alpha * np.log(1 + (n / alpha)) - s)**2

    # Temporarily silence RuntimeWarnings (invalid and division by zero) during
    # optimization in case invalid input is provided to the objective function
    # (e.g. alpha=0).
    orig_settings = np.seterr(divide='ignore', invalid='ignore')
    try:
        alpha = minimize_scalar(f).x
    finally:
        np.seterr(**orig_settings)

    if f(alpha) > 1.0:
        raise RuntimeError("Optimizer failed to converge (error > 1.0), so "
                           "could not compute Fisher's alpha.")
    return alpha
示例#9
0
def chao1(counts, bias_corrected=True):
    r"""Calculate chao1 richness estimator.

    Uses the bias-corrected version unless `bias_corrected` is ``False`` *and*
    there are both singletons and doubletons.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    bias_corrected : bool, optional
        Indicates whether or not to use the bias-corrected version of the
        equation. If ``False`` *and* there are both singletons and doubletons,
        the uncorrected version will be used. The biased-corrected version will
        be used otherwise.

    Returns
    -------
    double
        Computed chao1 richness estimator.

    See Also
    --------
    chao1_ci

    Notes
    -----
    The uncorrected version is based on Equation 6 in [1]_:

    .. math::

       chao1=S_{obs}+\frac{F_1^2}{2F_2}

    where :math:`F_1` and :math:`F_2` are the count of singletons and
    doubletons, respectively.

    The bias-corrected version is defined as

    .. math::

       chao1=S_{obs}+\frac{F_1(F_1-1)}{2(F_2+1)}

    References
    ----------
    .. [1] Chao, A. 1984. Non-parametric estimation of the number of classes in
       a population. Scandinavian Journal of Statistics 11, 265-270.

    """
    counts = _validate_counts_vector(counts)
    o, s, d = osd(counts)

    if not bias_corrected and s and d:
        return o + s ** 2 / (d * 2)
    else:
        return o + s * (s - 1) / (2 * (d + 1))
示例#10
0
def chao1(counts, bias_corrected=True):
    r"""Calculate chao1 richness estimator.

    Uses the bias-corrected version unless `bias_corrected` is ``False`` *and*
    there are both singletons and doubletons.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    bias_corrected : bool, optional
        Indicates whether or not to use the bias-corrected version of the
        equation. If ``False`` *and* there are both singletons and doubletons,
        the uncorrected version will be used. The biased-corrected version will
        be used otherwise.

    Returns
    -------
    double
        Computed chao1 richness estimator.

    See Also
    --------
    chao1_ci

    Notes
    -----
    The uncorrected version is based on Equation 6 in [1]_:

    .. math::

       chao1=S_{obs}+\frac{F_1^2}{2F_2}

    where :math:`F_1` and :math:`F_2` are the count of singletons and
    doubletons, respectively.

    The bias-corrected version is defined as

    .. math::

       chao1=S_{obs}+\frac{F_1(F_1-1)}{2(F_2+1)}

    References
    ----------
    .. [1] Chao, A. 1984. Non-parametric estimation of the number of classes in
       a population. Scandinavian Journal of Statistics 11, 265-270.

    """
    counts = _validate_counts_vector(counts)
    o, s, d = osd(counts)

    if not bias_corrected and s and d:
        return o + s**2 / (d * 2)
    else:
        return o + s * (s - 1) / (2 * (d + 1))
示例#11
0
def chao1_ci(counts, bias_corrected=True, zscore=1.96):
    """Calculate chao1 confidence interval.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    bias_corrected : bool, optional
        Indicates whether or not to use the bias-corrected version of the
        equation. If ``False`` *and* there are both singletons and doubletons,
        the uncorrected version will be used. The biased-corrected version will
        be used otherwise.
    zscore : scalar, optional
        Score to use for confidence. Default of 1.96 is for a 95% confidence
        interval.

    Returns
    -------
    tuple
        chao1 confidence interval as ``(lower_bound, upper_bound)``.

    See Also
    --------
    chao1

    Notes
    -----
    The implementation here is based on the equations in the EstimateS manual
    [1]_. Different equations are employed to calculate the chao1 variance and
    confidence interval depending on `bias_corrected` and the presence/absence
    of singletons and/or doubletons.

    Specifically, the following EstimateS equations are used:

    1. No singletons, Equation 14.
    2. Singletons but no doubletons, Equations 7, 13.
    3. Singletons and doubletons, ``bias_corrected=True``, Equations 6, 13.
    4. Singletons and doubletons, ``bias_corrected=False``, Equations 5, 13.

    References
    ----------
    .. [1] http://viceroy.eeb.uconn.edu/estimates/

    """
    counts = _validate_counts_vector(counts)
    o, s, d = osd(counts)
    if s:
        chao = chao1(counts, bias_corrected)
        chaovar = _chao1_var(counts, bias_corrected)
        return _chao_confidence_with_singletons(chao, o, chaovar, zscore)
    else:
        n = counts.sum()
        return _chao_confidence_no_singletons(n, o, zscore)
示例#12
0
def chao1_ci(counts, bias_corrected=True, zscore=1.96):
    """Calculate chao1 confidence interval.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    bias_corrected : bool, optional
        Indicates whether or not to use the bias-corrected version of the
        equation. If ``False`` *and* there are both singletons and doubletons,
        the uncorrected version will be used. The biased-corrected version will
        be used otherwise.
    zscore : scalar, optional
        Score to use for confidence. Default of 1.96 is for a 95% confidence
        interval.

    Returns
    -------
    tuple
        chao1 confidence interval as ``(lower_bound, upper_bound)``.

    See Also
    --------
    chao1

    Notes
    -----
    The implementation here is based on the equations in the EstimateS manual
    [1]_. Different equations are employed to calculate the chao1 variance and
    confidence interval depending on `bias_corrected` and the presence/absence
    of singletons and/or doubletons.

    Specifically, the following EstimateS equations are used:

    1. No singletons, Equation 14.
    2. Singletons but no doubletons, Equations 7, 13.
    3. Singletons and doubletons, ``bias_corrected=True``, Equations 6, 13.
    4. Singletons and doubletons, ``bias_corrected=False``, Equations 5, 13.

    References
    ----------
    .. [1] http://viceroy.eeb.uconn.edu/estimates/

    """
    counts = _validate_counts_vector(counts)
    o, s, d = osd(counts)
    if s:
        chao = chao1(counts, bias_corrected)
        chaovar = _chao1_var(counts, bias_corrected)
        return _chao_confidence_with_singletons(chao, o, chaovar, zscore)
    else:
        n = counts.sum()
        return _chao_confidence_no_singletons(n, o, zscore)
示例#13
0
    def test_validate_counts_vector(self):
        # python list
        obs = _validate_counts_vector([0, 2, 1, 3])
        npt.assert_array_equal(obs, np.array([0, 2, 1, 3]))
        self.assertEqual(obs.dtype, int)

        # numpy array (no copy made)
        data = np.array([0, 2, 1, 3])
        obs = _validate_counts_vector(data)
        npt.assert_array_equal(obs, data)
        self.assertEqual(obs.dtype, int)
        self.assertTrue(obs is data)

        # single element
        obs = _validate_counts_vector([42])
        npt.assert_array_equal(obs, np.array([42]))
        self.assertEqual(obs.dtype, int)
        self.assertEqual(obs.shape, (1,))

        # suppress casting to int
        obs = _validate_counts_vector([42.2, 42.1, 0], suppress_cast=True)
        npt.assert_array_equal(obs, np.array([42.2, 42.1, 0]))
        self.assertEqual(obs.dtype, float)

        # all zeros
        obs = _validate_counts_vector([0, 0, 0])
        npt.assert_array_equal(obs, np.array([0, 0, 0]))
        self.assertEqual(obs.dtype, int)

        # all zeros (single value)
        obs = _validate_counts_vector([0])
        npt.assert_array_equal(obs, np.array([0]))
        self.assertEqual(obs.dtype, int)
示例#14
0
    def test_validate_counts_vector(self):
        # python list
        obs = _validate_counts_vector([0, 2, 1, 3])
        npt.assert_array_equal(obs, np.array([0, 2, 1, 3]))
        self.assertEqual(obs.dtype, int)

        # numpy array (no copy made)
        data = np.array([0, 2, 1, 3])
        obs = _validate_counts_vector(data)
        npt.assert_array_equal(obs, data)
        self.assertEqual(obs.dtype, int)
        self.assertTrue(obs is data)

        # single element
        obs = _validate_counts_vector([42])
        npt.assert_array_equal(obs, np.array([42]))
        self.assertEqual(obs.dtype, int)
        self.assertEqual(obs.shape, (1, ))

        # suppress casting to int
        obs = _validate_counts_vector([42.2, 42.1, 0], suppress_cast=True)
        npt.assert_array_equal(obs, np.array([42.2, 42.1, 0]))
        self.assertEqual(obs.dtype, float)

        # all zeros
        obs = _validate_counts_vector([0, 0, 0])
        npt.assert_array_equal(obs, np.array([0, 0, 0]))
        self.assertEqual(obs.dtype, int)

        # all zeros (single value)
        obs = _validate_counts_vector([0])
        npt.assert_array_equal(obs, np.array([0]))
        self.assertEqual(obs.dtype, int)
示例#15
0
def esty_ci(counts):
    r"""Calculate Esty's CI.

    Esty's CI is defined as

    .. math::

       F_1/N \pm z\sqrt{W}

    where :math:`F_1` is the number of singleton OTUs, :math:`N` is the total
    number of individuals (sum of abundances for all OTUs), and :math:`z` is a
    constant that depends on the targeted confidence and based on the normal
    distribution.

    :math:`W` is defined as

    .. math::

       \frac{F_1(N-F_1)+2NF_2}{N^3}

    where :math:`F_2` is the number of doubleton OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    tuple
        Esty's confidence interval as ``(lower_bound, upper_bound)``.

    Notes
    -----
    Esty's CI is defined in [1]_. :math:`z` is hardcoded for a 95% confidence
    interval.

    References
    ----------
    .. [1] Esty, W. W. (1983). "A normal limit law for a nonparametric
       estimator of the coverage of a random sample". Ann Statist 11: 905-912.

    """
    counts = _validate_counts_vector(counts)

    f1 = singles(counts)
    f2 = doubles(counts)
    n = counts.sum()
    z = 1.959963985
    W = (f1 * (n - f1) + 2 * n * f2) / (n ** 3)

    return f1 / n - z * np.sqrt(W), f1 / n + z * np.sqrt(W)
示例#16
0
def esty_ci(counts):
    r"""Calculate Esty's CI.

    Esty's CI is defined as

    .. math::

       F_1/N \pm z\sqrt{W}

    where :math:`F_1` is the number of singleton OTUs, :math:`N` is the total
    number of individuals (sum of abundances for all OTUs), and :math:`z` is a
    constant that depends on the targeted confidence and based on the normal
    distribution.

    :math:`W` is defined as

    .. math::

       \frac{F_1(N-F_1)+2NF_2}{N^3}

    where :math:`F_2` is the number of doubleton OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    tuple
        Esty's confidence interval as ``(lower_bound, upper_bound)``.

    Notes
    -----
    Esty's CI is defined in [1]_. :math:`z` is hardcoded for a 95% confidence
    interval.

    References
    ----------
    .. [1] Esty, W. W. (1983). "A normal limit law for a nonparametric
       estimator of the coverage of a random sample". Ann Statist 11: 905-912.

    """
    counts = _validate_counts_vector(counts)

    f1 = singles(counts)
    f2 = doubles(counts)
    n = counts.sum()
    z = 1.959963985
    W = (f1 * (n - f1) + 2 * n * f2) / (n**3)

    return f1 / n - z * np.sqrt(W), f1 / n + z * np.sqrt(W)
示例#17
0
def _setup_faith_pd(counts, otu_ids, tree, validate, single_sample):
    if validate:
        if single_sample:
            # only validate count if operating in single sample mode, they
            # will have already been validated otherwise
            counts = _validate_counts_vector(counts)
            _validate_otu_ids_and_tree(counts, otu_ids, tree)
        else:
            _validate_otu_ids_and_tree(counts[0], otu_ids, tree)

    counts_by_node, tree_index, branch_lengths = _vectorize_counts_and_tree(counts, otu_ids, tree)

    return counts_by_node, branch_lengths
示例#18
0
def lladser_ci(counts, r, alpha=0.95, f=10, ci_type='ULCL'):
    """Calculate single CI of the conditional uncovered probability.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    r : int
        Number of new colors that are required for the next prediction.
    alpha : float, optional
        Desired confidence level.
    f : float, optional
        Ratio between upper and lower bound.
    ci_type : {'ULCL', 'ULCU', 'U', 'L'}
        Type of confidence interval. If ``'ULCL'``, upper and lower bounds with
        conservative lower bound. If ``'ULCU'``, upper and lower bounds with
        conservative upper bound. If ``'U'``, upper bound only, lower bound
        fixed to 0.0. If ``'L'``, lower bound only, upper bound fixed to 1.0.

    Returns
    -------
    tuple
        Confidence interval as ``(lower_bound, upper_bound)``.

    See Also
    --------
    lladser_pe

    Notes
    -----
    This function is just a wrapper around the full CI estimator described
    in Theorem 2 (iii) in [1]_, intended to be called for a single best CI
    estimate on a complete sample.

    References
    ----------
    .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via
       Poissonization: Accurate Measurements of the Microbial Unknown" PLoS
       2011.

    """
    counts = _validate_counts_vector(counts)
    sample = _expand_counts(counts)
    np.random.shuffle(sample)

    try:
        ci = list(_lladser_ci_series(sample, r, alpha, f, ci_type))[-1]
    except IndexError:
        ci = (np.nan, np.nan)

    return ci
示例#19
0
def lladser_ci(counts, r, alpha=0.95, f=10, ci_type='ULCL'):
    """Calculate single CI of the conditional uncovered probability.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    r : int
        Number of new colors that are required for the next prediction.
    alpha : float, optional
        Desired confidence level.
    f : float, optional
        Ratio between upper and lower bound.
    ci_type : {'ULCL', 'ULCU', 'U', 'L'}
        Type of confidence interval. If ``'ULCL'``, upper and lower bounds with
        conservative lower bound. If ``'ULCU'``, upper and lower bounds with
        conservative upper bound. If ``'U'``, upper bound only, lower bound
        fixed to 0.0. If ``'L'``, lower bound only, upper bound fixed to 1.0.

    Returns
    -------
    tuple
        Confidence interval as ``(lower_bound, upper_bound)``.

    See Also
    --------
    lladser_pe

    Notes
    -----
    This function is just a wrapper around the full CI estimator described
    in Theorem 2 (iii) in [1]_, intended to be called for a single best CI
    estimate on a complete sample.

    References
    ----------
    .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via
       Poissonization: Accurate Measurements of the Microbial Unknown" PLoS
       2011.

    """
    counts = _validate_counts_vector(counts)
    sample = _expand_counts(counts)
    np.random.shuffle(sample)

    try:
        ci = list(_lladser_ci_series(sample, r, alpha, f, ci_type))[-1]
    except IndexError:
        ci = (np.nan, np.nan)

    return ci
示例#20
0
    def test_validate_counts_vector_invalid_input(self):
        # wrong dtype
        with self.assertRaises(TypeError):
            _validate_counts_vector([0, 2, 1.2, 3])

        # wrong number of dimensions (2-D)
        with self.assertRaises(ValueError):
            _validate_counts_vector([[0, 2, 1, 3], [4, 5, 6, 7]])

        # wrong number of dimensions (scalar)
        with self.assertRaises(ValueError):
            _validate_counts_vector(1)

        # negative values
        with self.assertRaises(ValueError):
            _validate_counts_vector([0, 0, 2, -1, 3])
示例#21
0
    def test_validate_counts_vector_invalid_input(self):
        # wrong dtype
        with self.assertRaises(TypeError):
            _validate_counts_vector([0, 2, 1.2, 3])

        # wrong number of dimensions (2-D)
        with self.assertRaises(ValueError):
            _validate_counts_vector([[0, 2, 1, 3], [4, 5, 6, 7]])

        # wrong number of dimensions (scalar)
        with self.assertRaises(ValueError):
            _validate_counts_vector(1)

        # negative values
        with self.assertRaises(ValueError):
            _validate_counts_vector([0, 0, 2, -1, 3])
示例#22
0
def _setup_faith_pd(counts, otu_ids, tree, validate, single_sample):
    if validate:
        if single_sample:
            # only validate count if operating in single sample mode, they
            # will have already been validated otherwise
            counts = _validate_counts_vector(counts)
            _validate_otu_ids_and_tree(counts, otu_ids, tree)
        else:
            _validate_otu_ids_and_tree(counts[0], otu_ids, tree)

    counts_by_node, tree_index, branch_lengths = \
        _vectorize_counts_and_tree(counts, otu_ids, tree)

    return counts_by_node, branch_lengths
示例#23
0
def mcintosh_d(counts):
    r"""Calculate McIntosh dominance index D.

    McIntosh dominance index D is defined as:

    .. math::

       D = \frac{N - U}{N - \sqrt{N}}

    where :math:`N` is the total number of individuals in the sample and
    :math:`U` is defined as:

    .. math::

       U = \sqrt{\sum{{n_i}^2}}

    where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}`
    OTU.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        McIntosh dominance index D.

    See Also
    --------
    mcintosh_e

    Notes
    -----
    The index was proposed in [1]_. The implementation here is based on the
    description given in the SDR-IV online manual [2]_.

    References
    ----------
    .. [1] McIntosh, R. P. 1967 An index of diversity and the relation of
       certain concepts to diversity. Ecology 48, 1115-1126.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    u = np.sqrt((counts * counts).sum())
    n = counts.sum()
    return (n - u) / (n - np.sqrt(n))
示例#24
0
def mcintosh_d(counts):
    r"""Calculate McIntosh dominance index D.

    McIntosh dominance index D is defined as:

    .. math::

       D = \frac{N - U}{N - \sqrt{N}}

    where :math:`N` is the total number of individuals in the sample and
    :math:`U` is defined as:

    .. math::

       U = \sqrt{\sum{{n_i}^2}}

    where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}`
    OTU.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        McIntosh dominance index D.

    See Also
    --------
    mcintosh_e

    Notes
    -----
    The index was proposed in [1]_. The implementation here is based on the
    description given in the SDR-IV online manual [2]_.

    References
    ----------
    .. [1] McIntosh, R. P. 1967 An index of diversity and the relation of
       certain concepts to diversity. Ecology 48, 1115-1126.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    u = np.sqrt((counts * counts).sum())
    n = counts.sum()
    return (n - u) / (n - np.sqrt(n))
示例#25
0
def kempton_taylor_q(counts, lower_quantile=0.25, upper_quantile=0.75):
    """Calculate Kempton-Taylor Q index of alpha diversity.

    Estimates the slope of the cumulative abundance curve in the interquantile
    range. By default, uses lower and upper quartiles, rounding inwards.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    lower_quantile : float, optional
        Lower bound of the interquantile range. Defaults to lower quartile.
    upper_quantile : float, optional
        Upper bound of the interquantile range. Defaults to upper quartile.

    Returns
    -------
    double
        Kempton-Taylor Q index of alpha diversity.

    Notes
    -----
    The index is defined in [1]_. The implementation here is based on the
    description given in the SDR-IV online manual [2]_.

    The implementation provided here differs slightly from the results given in
    Magurran 1998. Specifically, we have 14 in the numerator rather than 15.
    Magurran recommends counting half of the OTUs with the same # counts as the
    point where the UQ falls and the point where the LQ falls, but the
    justification for this is unclear (e.g. if there were a very large # OTUs
    that just overlapped one of the quantiles, the results would be
    considerably off). Leaving the calculation as-is for now, but consider
    changing.

    References
    ----------
    .. [1] Kempton, R. A. and Taylor, L. R. (1976) Models and statistics for
       species diversity. Nature, 262, 818-820.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    n = len(counts)
    lower = int(np.ceil(n * lower_quantile))
    upper = int(n * upper_quantile)
    sorted_counts = np.sort(counts)
    return (upper - lower) / np.log(sorted_counts[upper] /
                                    sorted_counts[lower])
示例#26
0
def kempton_taylor_q(counts, lower_quantile=0.25, upper_quantile=0.75):
    """Calculate Kempton-Taylor Q index of alpha diversity.

    Estimates the slope of the cumulative abundance curve in the interquantile
    range. By default, uses lower and upper quartiles, rounding inwards.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    lower_quantile : float, optional
        Lower bound of the interquantile range. Defaults to lower quartile.
    upper_quantile : float, optional
        Upper bound of the interquantile range. Defaults to upper quartile.

    Returns
    -------
    double
        Kempton-Taylor Q index of alpha diversity.

    Notes
    -----
    The index is defined in [1]_. The implementation here is based on the
    description given in the SDR-IV online manual [2]_.

    The implementation provided here differs slightly from the results given in
    Magurran 1998. Specifically, we have 14 in the numerator rather than 15.
    Magurran recommends counting half of the OTUs with the same # counts as the
    point where the UQ falls and the point where the LQ falls, but the
    justification for this is unclear (e.g. if there were a very large # OTUs
    that just overlapped one of the quantiles, the results would be
    considerably off). Leaving the calculation as-is for now, but consider
    changing.

    References
    ----------
    .. [1] Kempton, R. A. and Taylor, L. R. (1976) Models and statistics for
       species diversity. Nature, 262, 818-820.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    n = len(counts)
    lower = int(np.ceil(n * lower_quantile))
    upper = int(n * upper_quantile)
    sorted_counts = np.sort(counts)
    return (upper - lower) / np.log(
        sorted_counts[upper] / sorted_counts[lower])
示例#27
0
def observed_otus(counts):
    """Calculate the number of distinct OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    int
        Distinct OTU count.

    """
    counts = _validate_counts_vector(counts)
    return (counts != 0).sum()
示例#28
0
def observed_otus(counts):
    """Calculate the number of distinct OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    int
        Distinct OTU count.

    """
    counts = _validate_counts_vector(counts)
    return (counts != 0).sum()
示例#29
0
def singles(counts):
    """Calculate number of single occurrences (singletons).

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    int
        Singleton count.

    """
    counts = _validate_counts_vector(counts)
    return (counts == 1).sum()
示例#30
0
def singles(counts):
    """Calculate number of single occurrences (singletons).

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    int
        Singleton count.

    """
    counts = _validate_counts_vector(counts)
    return (counts == 1).sum()
示例#31
0
def mcintosh_e(counts):
    r"""Calculate McIntosh's evenness measure E.

    McIntosh evenness measure E is defined as:

    .. math::

       E = \frac{\sqrt{\sum{n_i^2}}}{\sqrt{((N-S+1)^2 + S -1}}

    where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}`
    OTU, :math:`N` is the total number of individuals, and :math:`S` is the
    number of OTUs in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        McIntosh evenness measure E.

    See Also
    --------
    mcintosh_d

    Notes
    -----
    The implementation here is based on the description given in [1]_, **NOT**
    the one in the SDR-IV online manual, which is wrong.

    References
    ----------
    .. [1] Heip & Engels (1974) Comparing Species Diversity and Evenness
       Indices. p 560.

    """
    counts = _validate_counts_vector(counts)
    numerator = np.sqrt((counts * counts).sum())
    n = counts.sum()
    s = observed_otus(counts)
    denominator = np.sqrt((n - s + 1)**2 + s - 1)
    return numerator / denominator
示例#32
0
def strong(counts):
    r"""Calculate Strong's dominance index.

    Strong's dominance index is defined as:

    .. math::

       D_w = max_i[(\frac{b_i}{N})-\frac{i}{S}]

    where :math:`b_i` is the sequential cumulative totaling of the
    :math:`i^{\text{th}}` OTU abundance values ranked from largest to smallest,
    :math:`N` is the total number of individuals in the sample, and
    :math:`S` is the number of OTUs in the sample. The expression in brackets
    is computed for all OTUs, and :math:`max_i` denotes the maximum value in
    brackets for any OTU.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Strong's dominance index (Dw).

    Notes
    -----
    Strong's dominance index is defined in [1]_. The implementation here is
    based on the description given in the SDR-IV online manual [2]_.

    References
    ----------
    .. [1] Strong, W. L., 2002 Assessing species abundance uneveness within and
       between plant communities. Community Ecology, 3, 237-246.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    n = counts.sum()
    s = observed_otus(counts)
    i = np.arange(1, len(counts) + 1)
    sorted_sum = np.sort(counts)[::-1].cumsum()
    return (sorted_sum / n - (i / s)).max()
示例#33
0
def mcintosh_e(counts):
    r"""Calculate McIntosh's evenness measure E.

    McIntosh evenness measure E is defined as:

    .. math::

       E = \frac{\sqrt{\sum{n_i^2}}}{\sqrt{((N-S+1)^2 + S -1}}

    where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}`
    OTU, :math:`N` is the total number of individuals, and :math:`S` is the
    number of OTUs in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        McIntosh evenness measure E.

    See Also
    --------
    mcintosh_d

    Notes
    -----
    The implementation here is based on the description given in [1]_, **NOT**
    the one in the SDR-IV online manual, which is wrong.

    References
    ----------
    .. [1] Heip & Engels (1974) Comparing Species Diversity and Evenness
       Indices. p 560.

    """
    counts = _validate_counts_vector(counts)
    numerator = np.sqrt((counts * counts).sum())
    n = counts.sum()
    s = observed_otus(counts)
    denominator = np.sqrt((n - s + 1) ** 2 + s - 1)
    return numerator / denominator
示例#34
0
def pielou_e(counts):
    r"""Calculate Pielou's Evenness index J'.

    Pielou's Evenness is defined as:

    .. math::

       J' = \frac{(H)}{\ln(S)}

    where :math:`H` is the Shannon-Wiener entropy of counts and :math:`S` is
    the number of OTUs in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Pielou's Evenness.

    See Also
    --------
    shannon
    heip_e

    Notes
    -----
    The implementation here is based on the description in Wikipedia [1]_.
    It was first proposed by E. C. Pielou [2]_ and is similar to Heip's
    evenness [3]_.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Species_evenness
    .. [2] Pielou, E. C., 1966. The measurement of diversity in different types
       of biological collections. Journal of Theoretical Biology, 13, 131-44.
    .. [3] Heip, C. 1974. A new index measuring evenness. J. Mar. Biol. Ass.
       UK., 54, 555-557.

    """
    counts = _validate_counts_vector(counts)
    return shannon(counts, base=np.e) / np.log(observed_otus(counts))
示例#35
0
def lladser_pe(counts, r=10):
    """Calculate single point estimate of conditional uncovered probability.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    r : int, optional
        Number of new colors that are required for the next prediction.

    Returns
    -------
    double
        Single point estimate of the conditional uncovered probability. May be
        ``np.nan`` if a point estimate could not be computed.

    See Also
    --------
    lladser_ci

    Notes
    -----
    This function is just a wrapper around the full point estimator described
    in Theorem 2 (i) in [1]_, intended to be called for a single best estimate
    on a complete sample. This function is not guaranteed to return estimated
    uncovered probabilities less than 1 if the coverage is too low.

    References
    ----------
    .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via
       Poissonization: Accurate Measurements of the Microbial Unknown" PLoS
       2011.

    """
    counts = _validate_counts_vector(counts)
    sample = _expand_counts(counts)
    np.random.shuffle(sample)

    try:
        pe = list(_lladser_point_estimates(sample, r))[-1][0]
    except IndexError:
        pe = np.nan

    return pe
示例#36
0
def pielou_e(counts):
    r"""Calculate Pielou's Evenness index J'.

    Pielou's Evenness is defined as:

    .. math::

       J' = \frac{(H)}{\ln(S)}

    where :math:`H` is the Shannon-Wiener entropy of counts and :math:`S` is
    the number of OTUs in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Pielou's Evenness.

    See Also
    --------
    shannon
    heip_e

    Notes
    -----
    The implementation here is based on the description in Wikipedia [1]_.
    It was first proposed by E. C. Pielou [2]_ and is similar to Heip's
    evenness [3]_.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Species_evenness
    .. [2] Pielou, E. C., 1966. The measurement of diversity in different types
       of biological collections. Journal of Theoretical Biology, 13, 131-44.
    .. [3] Heip, C. 1974. A new index measuring evenness. J. Mar. Biol. Ass.
       UK., 54, 555-557.

    """
    counts = _validate_counts_vector(counts)
    return shannon(counts, base=np.e) / np.log(observed_otus(counts))
示例#37
0
def strong(counts):
    r"""Calculate Strong's dominance index.

    Strong's dominance index is defined as:

    .. math::

       D_w = max_i[(\frac{b_i}{N})-\frac{i}{S}]

    where :math:`b_i` is the sequential cumulative totaling of the
    :math:`i^{\text{th}}` OTU abundance values ranked from largest to smallest,
    :math:`N` is the total number of individuals in the sample, and
    :math:`S` is the number of OTUs in the sample. The expression in brackets
    is computed for all OTUs, and :math:`max_i` denotes the maximum value in
    brackets for any OTU.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Strong's dominance index (Dw).

    Notes
    -----
    Strong's dominance index is defined in [1]_. The implementation here is
    based on the description given in the SDR-IV online manual [2]_.

    References
    ----------
    .. [1] Strong, W. L., 2002 Assessing species abundance uneveness within and
       between plant communities. Community Ecology, 3, 237-246.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    n = counts.sum()
    s = observed_otus(counts)
    i = np.arange(1, len(counts) + 1)
    sorted_sum = np.sort(counts)[::-1].cumsum()
    return (sorted_sum / n - (i / s)).max()
示例#38
0
def lladser_pe(counts, r=10):
    """Calculate single point estimate of conditional uncovered probability.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    r : int, optional
        Number of new colors that are required for the next prediction.

    Returns
    -------
    double
        Single point estimate of the conditional uncovered probability. May be
        ``np.nan`` if a point estimate could not be computed.

    See Also
    --------
    lladser_ci

    Notes
    -----
    This function is just a wrapper around the full point estimator described
    in Theorem 2 (i) in [1]_, intended to be called for a single best estimate
    on a complete sample. This function is not guaranteed to return estimated
    uncovered probabilities less than 1 if the coverage is too low.

    References
    ----------
    .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via
       Poissonization: Accurate Measurements of the Microbial Unknown" PLoS
       2011.

    """
    counts = _validate_counts_vector(counts)
    sample = _expand_counts(counts)
    np.random.shuffle(sample)

    try:
        pe = list(_lladser_point_estimates(sample, r))[-1][0]
    except IndexError:
        pe = np.nan

    return pe
示例#39
0
def dominance(counts):
    r"""Calculate dominance.

    Dominance is defined as

    .. math::

       \sum{p_i^2}

    where :math:`p_i` is the proportion of the entire community that OTU
    :math:`i` represents.

    Dominance can also be defined as 1 - Simpson's index. It ranges between
    0 and 1.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Dominance.

    See Also
    --------
    simpson

    Notes
    -----
    The implementation here is based on the description given in [1]_.

    References
    ----------
    .. [1] http://folk.uio.no/ohammer/past/diversity.html

    """
    counts = _validate_counts_vector(counts)
    freqs = counts / counts.sum()
    return (freqs * freqs).sum()
示例#40
0
def dominance(counts):
    r"""Calculate dominance.

    Dominance is defined as

    .. math::

       \sum{p_i^2}

    where :math:`p_i` is the proportion of the entire community that OTU
    :math:`i` represents.

    Dominance can also be defined as 1 - Simpson's index. It ranges between
    0 and 1.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Dominance.

    See Also
    --------
    simpson

    Notes
    -----
    The implementation here is based on the description given in [1]_.

    References
    ----------
    .. [1] http://folk.uio.no/ohammer/past/diversity.html

    """
    counts = _validate_counts_vector(counts)
    freqs = counts / counts.sum()
    return (freqs * freqs).sum()
示例#41
0
def heip_e(counts):
    r"""Calculate Heip's evenness measure.

    Heip's evenness is defined as:

    .. math::

       \frac{(e^H-1)}{(S-1)}

    where :math:`H` is the Shannon-Wiener entropy of counts (using logarithm
    base :math:`e`) and :math:`S` is the number of OTUs in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Heip's evenness measure.

    See Also
    --------
    shannon
    pielou_e

    Notes
    -----
    The implementation here is based on the description in [1]_.

    References
    ----------
    .. [1] Heip, C. 1974. A new index measuring evenness. J. Mar. Biol. Ass.
       UK., 54, 555-557.

    """
    counts = _validate_counts_vector(counts)
    return ((np.exp(shannon(counts, base=np.e)) - 1) /
            (observed_otus(counts) - 1))
示例#42
0
def heip_e(counts):
    r"""Calculate Heip's evenness measure.

    Heip's evenness is defined as:

    .. math::

       \frac{(e^H-1)}{(S-1)}

    where :math:`H` is the Shannon-Wiener entropy of counts (using logarithm
    base :math:`e`) and :math:`S` is the number of OTUs in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Heip's evenness measure.

    See Also
    --------
    shannon
    pielou_e

    Notes
    -----
    The implementation here is based on the description in [1]_.

    References
    ----------
    .. [1] Heip, C. 1974. A new index measuring evenness. J. Mar. Biol. Ass.
       UK., 54, 555-557.

    """
    counts = _validate_counts_vector(counts)
    return ((np.exp(shannon(counts, base=np.e)) - 1) /
            (observed_otus(counts) - 1))
示例#43
0
def shannon(counts, base=2):
    r"""Calculate Shannon entropy of counts, default in bits.

    Shannon-Wiener diversity index is defined as:

    .. math::

       H = -\sum_{i=1}^s\left(p_i\log_2 p_i\right)

    where :math:`s` is the number of OTUs and :math:`p_i` is the proportion of
    the community represented by OTU :math:`i`.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    base : scalar, optional
        Logarithm base to use in the calculations.

    Returns
    -------
    double
        Shannon diversity index H.

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_ except that the default logarithm base used here is 2
    instead of :math:`e`.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    freqs = counts / counts.sum()
    nonzero_freqs = freqs[freqs.nonzero()]
    return -(nonzero_freqs * np.log(nonzero_freqs)).sum() / np.log(base)
示例#44
0
def enspie(counts):
    r"""Calculate ENS_pie alpha diversity measure.

    ENS_pie is equivalent to ``1 / dominance``:

    .. math::

       ENS_{pie} = \frac{1}{\sum_{i=1}^s{p_i^2}}

    where :math:`s` is the number of OTUs and :math:`p_i` is the proportion of
    the community represented by OTU :math:`i`.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        ENS_pie alpha diversity measure.

    See Also
    --------
    dominance

    Notes
    -----
    ENS_pie is defined in [1]_.

    References
    ----------
    .. [1] Chase and Knight (2013). "Scale-dependent effect sizes of ecological
       drivers on biodiversity: why standardised sampling is not enough".
       Ecology Letters, Volume 16, Issue Supplement s1, pgs 17-26.

    """
    counts = _validate_counts_vector(counts)
    return 1 / dominance(counts)
示例#45
0
def shannon(counts, base=2):
    r"""Calculate Shannon entropy of counts, default in bits.

    Shannon-Wiener diversity index is defined as:

    .. math::

       H = -\sum_{i=1}^s\left(p_i\log_2 p_i\right)

    where :math:`s` is the number of OTUs and :math:`p_i` is the proportion of
    the community represented by OTU :math:`i`.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    base : scalar, optional
        Logarithm base to use in the calculations.

    Returns
    -------
    double
        Shannon diversity index H.

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_ except that the default logarithm base used here is 2
    instead of :math:`e`.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    freqs = counts / counts.sum()
    nonzero_freqs = freqs[freqs.nonzero()]
    return -(nonzero_freqs * np.log(nonzero_freqs)).sum() / np.log(base)
示例#46
0
def enspie(counts):
    r"""Calculate ENS_pie alpha diversity measure.

    ENS_pie is equivalent to ``1 / dominance``:

    .. math::

       ENS_{pie} = \frac{1}{\sum_{i=1}^s{p_i^2}}

    where :math:`s` is the number of OTUs and :math:`p_i` is the proportion of
    the community represented by OTU :math:`i`.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        ENS_pie alpha diversity measure.

    See Also
    --------
    dominance

    Notes
    -----
    ENS_pie is defined in [1]_.

    References
    ----------
    .. [1] Chase and Knight (2013). "Scale-dependent effect sizes of ecological
       drivers on biodiversity: why standardised sampling is not enough".
       Ecology Letters, Volume 16, Issue Supplement s1, pgs 17-26.

    """
    counts = _validate_counts_vector(counts)
    return 1 / dominance(counts)
示例#47
0
def simpson_e(counts):
    r"""Calculate Simpson's evenness measure E.

    Simpson's E is defined as

    .. math::

       E=\frac{1 / D}{S_{obs}}

    where :math:`D` is dominance and :math:`S_{obs}` is the number of observed
    OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Simpson's evenness measure E.

    See Also
    --------
    dominance
    enspie
    simpson

    Notes
    -----
    The implementation here is based on the description given in [1]_.

    References
    ----------
    .. [1] http://www.tiem.utk.edu/~gross/bioed/bealsmodules/simpsonDI.html

    """
    counts = _validate_counts_vector(counts)
    return enspie(counts) / observed_otus(counts)
示例#48
0
def simpson(counts):
    r"""Calculate Simpson's index.

    Simpson's index is defined as ``1 - dominance``:

    .. math::

       1 - \sum{p_i^2}

    where :math:`p_i` is the proportion of the community represented by OTU
    :math:`i`.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Simpson's index.

    See Also
    --------
    dominance

    Notes
    -----
    The implementation here is ``1 - dominance`` as described in [1]_. Other
    references (such as [2]_) define Simpson's index as ``1 / dominance``.

    References
    ----------
    .. [1] http://folk.uio.no/ohammer/past/diversity.html
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    return 1 - dominance(counts)
示例#49
0
def simpson_e(counts):
    r"""Calculate Simpson's evenness measure E.

    Simpson's E is defined as

    .. math::

       E=\frac{1 / D}{S_{obs}}

    where :math:`D` is dominance and :math:`S_{obs}` is the number of observed
    OTUs.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Simpson's evenness measure E.

    See Also
    --------
    dominance
    enspie
    simpson

    Notes
    -----
    The implementation here is based on the description given in [1]_.

    References
    ----------
    .. [1] http://www.tiem.utk.edu/~gross/bioed/bealsmodules/simpsonDI.html

    """
    counts = _validate_counts_vector(counts)
    return enspie(counts) / observed_otus(counts)
示例#50
0
def simpson(counts):
    r"""Calculate Simpson's index.

    Simpson's index is defined as ``1 - dominance``:

    .. math::

       1 - \sum{p_i^2}

    where :math:`p_i` is the proportion of the community represented by OTU
    :math:`i`.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Simpson's index.

    See Also
    --------
    dominance

    Notes
    -----
    The implementation here is ``1 - dominance`` as described in [1]_. Other
    references (such as [2]_) define Simpson's index as ``1 / dominance``.

    References
    ----------
    .. [1] http://folk.uio.no/ohammer/past/diversity.html
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    return 1 - dominance(counts)
示例#51
0
def berger_parker_d(counts):
    r"""Calculate Berger-Parker dominance.

    Berger-Parker dominance is defined as the fraction of the sample that
    belongs to the most abundant OTU:

    .. math::

       d = \frac{N_{max}}{N}

    where :math:`N_{max}` is defined as the number of individuals in the most
    abundant OTU (or any of the most abundant OTUs in the case of ties), and
    :math:`N` is defined as the total number of individuals in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Berger-Parker dominance.

    Notes
    -----
    Berger-Parker dominance is defined in [1]_. The implementation here is
    based on the description given in the SDR-IV online manual [2]_.

    References
    ----------
    .. [1] Berger & Parker (1970). SDR-IV online help.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    return counts.max() / counts.sum()
示例#52
0
def brillouin_d(counts):
    r"""Calculate Brillouin index of alpha diversity.

    This is calculated as follows:

    .. math::

       HB = \frac{\ln N!-\sum^s_{i=1}{\ln n_i!}}{N}

    where :math:`N` is defined as the total number of individuals in the
    sample, :math:`s` is the number of OTUs, and :math:`n_i` is defined as the
    number of individuals in the :math:`i^{\text{th}}` OTU.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Brillouin index.

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    nz = counts[counts.nonzero()]
    n = nz.sum()
    return (gammaln(n + 1) - gammaln(nz + 1).sum()) / n
示例#53
0
def brillouin_d(counts):
    r"""Calculate Brillouin index of alpha diversity.

    This is calculated as follows:

    .. math::

       HB = \frac{\ln N!-\sum^s_{i=1}{\ln n_i!}}{N}

    where :math:`N` is defined as the total number of individuals in the
    sample, :math:`s` is the number of OTUs, and :math:`n_i` is defined as the
    number of individuals in the :math:`i^{\text{th}}` OTU.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Brillouin index.

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    nz = counts[counts.nonzero()]
    n = nz.sum()
    return (gammaln(n + 1) - gammaln(nz + 1).sum()) / n
示例#54
0
def berger_parker_d(counts):
    r"""Calculate Berger-Parker dominance.

    Berger-Parker dominance is defined as the fraction of the sample that
    belongs to the most abundant OTU:

    .. math::

       d = \frac{N_{max}}{N}

    where :math:`N_{max}` is defined as the number of individuals in the most
    abundant OTU (or any of the most abundant OTUs in the case of ties), and
    :math:`N` is defined as the total number of individuals in the sample.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Berger-Parker dominance.

    Notes
    -----
    Berger-Parker dominance is defined in [1]_. The implementation here is
    based on the description given in the SDR-IV online manual [2]_.

    References
    ----------
    .. [1] Berger & Parker (1970). SDR-IV online help.
    .. [2] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate_counts_vector(counts)
    return counts.max() / counts.sum()
示例#55
0
def margalef(counts):
    r"""Calculate Margalef's richness index.

    Margalef's D is defined as:

    .. math::

       D = \frac{(S - 1)}{\ln N}

    where :math:`S` is the number of OTUs and :math:`N` is the total number of
    individuals in the sample.

    Assumes log accumulation.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Margalef's richness index.

    Notes
    -----
    Based on the description in [1]_.

    References
    ----------
    .. [1] Magurran, A E 2004. Measuring biological diversity. Blackwell. pp.
       76-77.

    """
    counts = _validate_counts_vector(counts)
    return (observed_otus(counts) - 1) / np.log(counts.sum())
示例#56
0
def margalef(counts):
    r"""Calculate Margalef's richness index.

    Margalef's D is defined as:

    .. math::

       D = \frac{(S - 1)}{\ln N}

    where :math:`S` is the number of OTUs and :math:`N` is the total number of
    individuals in the sample.

    Assumes log accumulation.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Margalef's richness index.

    Notes
    -----
    Based on the description in [1]_.

    References
    ----------
    .. [1] Magurran, A E 2004. Measuring biological diversity. Blackwell. pp.
       76-77.

    """
    counts = _validate_counts_vector(counts)
    return (observed_otus(counts) - 1) / np.log(counts.sum())
示例#57
0
def menhinick(counts):
    r"""Calculate Menhinick's richness index.

    Menhinick's richness index is defined as:

    .. math::

       D_{Mn} = \frac{S}{\sqrt{N}}

    where :math:`S` is the number of OTUs and :math:`N` is the total number of
    individuals in the sample.

    Assumes square-root accumulation.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Menhinick's richness index.

    Notes
    -----
    Based on the description in [1]_.

    References
    ----------
    .. [1] Magurran, A E 2004. Measuring biological diversity. Blackwell. pp.
       76-77.

    """
    counts = _validate_counts_vector(counts)
    return observed_otus(counts) / np.sqrt(counts.sum())
示例#58
0
def menhinick(counts):
    r"""Calculate Menhinick's richness index.

    Menhinick's richness index is defined as:

    .. math::

       D_{Mn} = \frac{S}{\sqrt{N}}

    where :math:`S` is the number of OTUs and :math:`N` is the total number of
    individuals in the sample.

    Assumes square-root accumulation.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Menhinick's richness index.

    Notes
    -----
    Based on the description in [1]_.

    References
    ----------
    .. [1] Magurran, A E 2004. Measuring biological diversity. Blackwell. pp.
       76-77.

    """
    counts = _validate_counts_vector(counts)
    return observed_otus(counts) / np.sqrt(counts.sum())
示例#59
0
def michaelis_menten_fit(counts, num_repeats=1, params_guess=None):
    r"""Calculate Michaelis-Menten fit to rarefaction curve of observed OTUs.

    The Michaelis-Menten equation is defined as:

    .. math::

       S=\frac{nS_{max}}{n+B}

    where :math:`n` is the number of individuals and :math:`S` is the number of
    OTUs. This function estimates the :math:`S_{max}` parameter.

    The fit is made to datapoints for :math:`n=1,2,...,N`, where :math:`N` is
    the total number of individuals (sum of abundances for all OTUs).
    :math:`S` is the number of OTUs represented in a random sample of :math:`n`
    individuals.

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    num_repeats : int, optional
        The number of times to perform rarefaction (subsampling without
        replacement) at each value of :math:`n`.
    params_guess : tuple, optional
        Initial guess of :math:`S_{max}` and :math:`B`. If ``None``, default
        guess for :math:`S_{max}` is :math:`S` (as :math:`S_{max}` should
        be >= :math:`S`) and default guess for :math:`B` is ``round(N / 2)``.

    Returns
    -------
    S_max : double
        Estimate of the :math:`S_{max}` parameter in the Michaelis-Menten
        equation.

    See Also
    --------
    skbio.stats.subsample_counts

    Notes
    -----
    There is some controversy about how to do the fitting. The ML model given
    in [1]_ is based on the assumption that error is roughly proportional to
    magnitude of observation, reasonable for enzyme kinetics but not reasonable
    for rarefaction data. Here we just do a nonlinear curve fit for the
    parameters using least-squares.

    References
    ----------
    .. [1] Raaijmakers, J. G. W. 1987 Statistical analysis of the
       Michaelis-Menten equation. Biometrics 43, 793-803.

    """
    counts = _validate_counts_vector(counts)

    n_indiv = counts.sum()
    if params_guess is None:
        S_max_guess = observed_otus(counts)
        B_guess = int(round(n_indiv / 2))
        params_guess = (S_max_guess, B_guess)

    # observed # of OTUs vs # of individuals sampled, S vs n
    xvals = np.arange(1, n_indiv + 1)
    ymtx = np.empty((num_repeats, len(xvals)), dtype=int)
    for i in range(num_repeats):
        ymtx[i] = np.asarray([observed_otus(subsample_counts(counts, n))
                              for n in xvals], dtype=int)
    yvals = ymtx.mean(0)

    # Vectors of actual vals y and number of individuals n.
    def errfn(p, n, y):
        return (((p[0] * n / (p[1] + n)) - y) ** 2).sum()

    # Return S_max.
    return fmin_powell(errfn, params_guess, ftol=1e-5, args=(xvals, yvals),
                       disp=False)[0]
示例#60
0
def ace(counts, rare_threshold=10):
    r"""Calculate the ACE metric (Abundance-based Coverage Estimator).

    The ACE metric is defined as:

    .. math::

       S_{ace}=S_{abund}+\frac{S_{rare}}{C_{ace}}+
       \frac{F_1}{C_{ace}}\gamma^2_{ace}

    where :math:`S_{abund}` is the number of abundant OTUs (with more than
    `rare_threshold`  individuals) when all samples are pooled,
    :math:`S_{rare}` is the number of rare OTUs (with less than or equal to
    `rare_threshold` individuals) when all samples are pooled, :math:`C_{ace}`
    is the sample abundance coverage estimator, :math:`F_1` is the frequency of
    singletons, and :math:`\gamma^2_{ace}` is the estimated coefficient of
    variation for rare OTUs.

    The estimated coefficient of variation is defined as (assuming
    `rare_threshold` is 10, the default):

    .. math::

       \gamma^2_{ace}=max\left[\frac{S_{rare}}{C_{ace}}
       \frac{\sum^{10}_{i=1}{{i\left(i-1\right)}}F_i}
       {\left(N_{rare}\right)\left(N_{rare}-1\right)} -1,0\right]

    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.
    rare_threshold : int, optional
        Threshold at which an OTU containing as many or fewer individuals will
        be considered rare.

    Returns
    -------
    double
        Computed ACE metric.

    Raises
    ------
    ValueError
        If every rare OTU is a singleton.

    Notes
    -----
    ACE was first introduced in [1]_ and [2]_. The implementation here is based
    on the description given in the EstimateS manual [3]_.

    If no rare OTUs exist, returns the number of abundant OTUs. The default
    value of 10 for `rare_threshold` is based on [4]_.

    If `counts` contains zeros, indicating OTUs which are known to exist in the
    environment but did not appear in the sample, they will be ignored for the
    purpose of calculating the number of rare OTUs.

    References
    ----------
    .. [1] Chao, A. & S.-M Lee. 1992 Estimating the number of classes via
       sample coverage. Journal of the American Statistical Association 87,
       210-217.
    .. [2] Chao, A., M.-C. Ma, & M. C. K. Yang. 1993. Stopping rules and
       estimation for recapture debugging with unequal failure rates.
       Biometrika 80, 193-201.
    .. [3] http://viceroy.eeb.uconn.edu/estimates/
    .. [4] Chao, A., W.-H. Hwang, Y.-C. Chen, and C.-Y. Kuo. 2000. Estimating
       the number of shared species in two communities. Statistica Sinica
       10:227-246.

    """
    counts = _validate_counts_vector(counts)
    freq_counts = np.bincount(counts)
    s_rare = _otus_rare(freq_counts, rare_threshold)
    singles = freq_counts[1]

    if singles > 0 and singles == s_rare:
        raise ValueError("The only rare OTUs are singletons, so the ACE "
                         "metric is undefined. EstimateS suggests using "
                         "bias-corrected Chao1 instead.")

    s_abun = _otus_abundant(freq_counts, rare_threshold)
    if s_rare == 0:
        return s_abun

    n_rare = _number_rare(freq_counts, rare_threshold)
    c_ace = 1 - singles / n_rare

    top = s_rare * _number_rare(freq_counts, rare_threshold, gamma=True)
    bottom = c_ace * n_rare * (n_rare - 1)
    gamma_ace = (top / bottom) - 1

    if gamma_ace < 0:
        gamma_ace = 0

    return s_abun + (s_rare / c_ace) + ((singles / c_ace) * gamma_ace)