Python ilr_inv示例，skbio.stats.composition.ilr_inv Python示例

示例#1

0

显示文件

    def test_summary_head(self):
        A = np.array  # aliasing for the sake of pep8
        table = pd.DataFrame({
            's1': ilr_inv(A([1., 3.])),
            's2': ilr_inv(A([2., 2.])),
            's3': ilr_inv(A([1., 3.])),
            's4': ilr_inv(A([3., 4.])),
            's5': ilr_inv(A([1., 5.]))},
            index=['a', 'b', 'c']).T
        tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        metadata = pd.DataFrame({
            'lame': [1, 2, 1, 4, 1],
            'real': [1, 2, 3, 4, 5]
        }, index=['s1', 's2', 's3', 's4', 's5'])

        np.random.seed(0)
        self.maxDiff = None
        model = ols('real', table, metadata, tree)
        model.fit()

        fname = get_data_path('exp_ols_results2.txt')
        res = str(model.summary(ndim=1))
        with open(fname, 'r') as fh:
            exp = fh.read()
            self.assertEqual(res, exp)

示例#2

0

显示文件

 def test_ilr_inv_basis_one_dimension_error(self):
     basis = clr(np.array([[0.80442968, 0.19557032]]))
     table = np.array([[np.log(1/10)*np.sqrt(1/2),
                        np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                        np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                        np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                        np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
     with self.assertRaises(ValueError):
         ilr_inv(table, basis=basis)

示例#3

0

显示文件

文件： test_composition.py 项目： RNAer/scikit-bio

 def test_ilr_inv_basis_one_dimension_error(self):
     basis = clr(np.array([[0.80442968, 0.19557032]]))
     table = np.array([[np.log(1/10)*np.sqrt(1/2),
                        np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                        np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                        np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                        np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
     with self.assertRaises(ValueError):
         ilr_inv(table, basis=basis)

示例#4

0

显示文件

文件： regression.py 项目： ltoran/canvas

def _regression(y, X, basis=None):
    """
    Performs a simplicial ordinary least squares on a set of
    compositions and a response variable

    Parameters
    ----------
    y : numpy.ndarray, float
       a matrix of proportions where
       rows correspond to samples and
       columns correspond to features.
    X : numpy.ndarray, float
       independent variable

    Returns
    -------
    predict: pd.DataFrame, float
       a predicted matrix of proportions where
       rows correspond to samples and
       columns correspond to features.
    b: pd.DataFrame, float
       a matrix of estimated coefficient compositions
    resid: pd.DataFrame, float
       a matrix of compositional residuals
    r2: float
       coefficient of determination
    """
    y = np.atleast_2d(y)
    X = np.atleast_2d(X)

    # Need to add constant for intercept
    r, c = X.shape

    y_ = ilr(y, basis=basis)

    # Now perform least squares to calculate unknown coefficients
    inv = np.linalg.pinv(np.dot(X.T, X))
    cross = np.dot(inv, X.T)
    b_ = np.dot(cross, y_)
    predict_ = np.dot(X, b_)
    resid = (y_ - predict_)
    sst = (y_ - y_.mean(axis=0))
    r2 = 1 - ((resid**2).sum() / (sst**2).sum())

    if len(b_.shape) == 1:
        b_ = np.atleast_2d(b_).T

    b = ilr_inv(b_)
    if len(predict_.shape) == 1:
        predict_ = np.atleast_2d(predict_).T
    predict = ilr_inv(predict_)

    if len(resid.shape) == 1:
        resid = np.atleast_2d(resid).T
    resid = ilr_inv(resid)
    return predict, b, resid, r2

示例#5

0

显示文件

 def setUp(self):
     A = np.array  # aliasing for the sake of pep8
     self.table = pd.DataFrame({
         's1': ilr_inv(A([1., 1.])),
         's2': ilr_inv(A([1., 2.])),
         's3': ilr_inv(A([1., 3.])),
         's4': ilr_inv(A([1., 4.])),
         's5': ilr_inv(A([1., 5.]))},
         index=['a', 'b', 'c']).T
     self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
     self.unannotated_tree = TreeNode.read(['(c, (b,a));'])
     self.metadata = pd.DataFrame({
         'lame': [1, 1, 1, 1, 1],
         'real': [1, 2, 3, 4, 5]
     }, index=['s1', 's2', 's3', 's4', 's5'])

示例#6

0

显示文件

def compositional_noise(cov, nsamp, rng=None):
    """
    This is multiplicative noise applied across the entire dataset.
    The noise is assumed to be Gaussian in the simplex.

    Parameters
    ----------
    cov: array_like
       Covariance matrix for the normal distribution in ilr space.
       This is assumed to be in the default gram-schmidt orthonormal basis.
    nsamp: int
       Number of samples to generate
    rng: np.random.RandomState
       Numpy random state.

    Returns
    -------
    np.array:
       A matrix of probabilities where there are `n` rows and
       `m` columns where `n` corresponds to the number of samples
       and `m` corresponds to the number of species.
    """
    if rng is None:
        rng = RandomState(0)
    dist = multivariate_normal.rvs(cov=cov, size=nsamp, random_state=rng)
    return ilr_inv(dist)

示例#7

0

显示文件

    def test_regression_results_residuals_projection(self):
        tree = TreeNode.read([r'(c, (a, b)Y2)Y1;'])
        basis, _ = balance_basis(tree)
        exp_resid = pd.DataFrame(
            {
                's1': [-0.986842, -0.236842],
                's2': [-0.065789, -1.815789],
                's3': [1.473684, 0.473684],
                's4': [1.394737, -1.105263],
                's5': [-1.065789, 1.184211],
                's6': [-1.144737, -0.394737],
                's7': [0.394737, 1.894737]
            },
            index=['Y1', 'Y2']).T
        exp_resid = pd.DataFrame(
            ilr_inv(exp_resid, basis),
            index=['s1', 's2', 's3', 's4', 's5', 's6', 's7'],
            columns=['c', 'a', 'b'])

        submodels = [self.model1, self.model2]
        res = submock(Y=self.balances, Xs=None)
        submock.submodels = submodels
        res.fit()
        res_resid = res.residuals(tree).sort_index()
        pdt.assert_frame_equal(res_resid,
                               exp_resid,
                               check_exact=False,
                               check_less_precise=True)

示例#8

0

显示文件

文件： _model.py 项目： biocore/gneiss

    def coefficients(self, tree=None):
        """ Returns coefficients from fit.

        Parameters
        ----------
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented as
            proportions. Otherwise, if this is not specified, the prediction
            will be represented as balances. (default: None).

        Returns
        -------
        pd.DataFrame
            A table of coefficients where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.
        """
        if not self._fitted:
            ValueError(('Model not fitted - coefficients not calculated.'
                        'See `fit()`'))
        coef = self._beta
        if tree is not None:
            basis, _ = balance_basis(tree)
            c = ilr_inv(coef.values, basis=basis)
            ids = [n.name for n in tree.tips()]
            return pd.DataFrame(c, columns=ids, index=coef.index)
        else:
            return coef

示例#9

0

显示文件

文件： _mixedlm.py 项目： biocore/gneiss

    def coefficients(self, tree=None):
        """ Returns coefficients from fit.

        Parameters
        ----------
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented as
            proportions. Otherwise, if this is not specified, the prediction
            will be represented as balances. (default: None).

        Returns
        -------
        pd.DataFrame
            A table of coefficients where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.
        """
        coef = pd.DataFrame()

        for r in self.results:
            c = r.params
            c.name = r.model.endog_names
            coef = coef.append(c)

        if tree is not None:
            basis, _ = balance_basis(tree)
            c = ilr_inv(coef.values.T, basis=basis).T

            return pd.DataFrame(c, index=[n.name for n in tree.tips()],
                                columns=coef.columns)
        else:
            return coef.T

示例#10

0

显示文件

文件： _model.py 项目： thermokarst-forks/gneiss

    def coefficients(self, tree=None):
        """ Returns coefficients from fit.

        Parameters
        ----------
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented as
            proportions. Otherwise, if this is not specified, the prediction
            will be represented as balances. (default: None).

        Returns
        -------
        pd.DataFrame
            A table of coefficients where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.
        """
        if not self._fitted:
            ValueError(('Model not fitted - coefficients not calculated.'
                        'See `fit()`'))
        coef = self._beta
        if tree is not None:
            basis, _ = balance_basis(tree)
            c = ilr_inv(coef.values, basis=basis)
            ids = [n.name for n in tree.tips()]
            return pd.DataFrame(c, columns=ids, index=coef.index)
        else:
            return coef

示例#11

0

显示文件

文件： sim.py 项目： mortonjt/benchmark-mae

def partition_microbes(num_microbes, sigmaQ, microbe_in, state):
    """ Split up a single microbe abundances into multiple strains.

    Parameters
    ----------
    num_microbes : int
        Number of strains to be represented
    sigmaQ : float
        The variance of the multivariate distribution
    microbe_in : np.array
        The input abundances for a single species
    state : numpy random state
        Random number generator

    Returns
    -------
    microbes_out : np.array
        Multiple strain abundances.
    """
    num_samples = len(microbe_in)

    a = state.multivariate_normal(mean=np.zeros(num_microbes - 1),
                                  cov=np.diag([sigmaQ] * (num_microbes - 1)),
                                  size=num_samples)

    microbe_partition = ilr_inv(a)

    microbes_out = np.multiply(microbe_partition, microbe_in.reshape(-1, 1))
    return microbes_out

示例#12

0

显示文件

文件： test_summary.py 项目： gitter-badger/gneiss

    def test_regression_results_coefficient_projection(self):
        exp_coef = pd.DataFrame(
            {
                'Intercept': ilr_inv(np.array([[1.447368, -0.052632]])),
                'X': ilr_inv(np.array([[0.539474, 1.289474]]))
            },
            index=['Z1', 'Z2', 'Z3'])
        feature_names = ['Z1', 'Z2', 'Z3']
        basis = _gram_schmidt_basis(3)
        res = RegressionResults(self.results,
                                basis=basis,
                                feature_names=feature_names)

        pdt.assert_frame_equal(res.coefficients(project=True),
                               exp_coef,
                               check_exact=False,
                               check_less_precise=True)

示例#13

0

显示文件

 def test_regression_results_residuals_projection(self):
     A = np.array  # aliasing np.array for the sake of pep8
     exp_resid = pd.DataFrame(
         {
             's1': ilr_inv(A([-0.986842, -0.236842])),
             's2': ilr_inv(A([-0.065789, -1.815789])),
             's3': ilr_inv(A([1.473684, 0.473684])),
             's4': ilr_inv(A([1.394737, -1.105263])),
             's5': ilr_inv(A([-1.065789, 1.184211])),
             's6': ilr_inv(A([-1.144737, -0.394737])),
             's7': ilr_inv(A([0.394737, 1.894737]))
         },
         index=['a', 'b', 'c']).T
     # note that in the example, the basis is not strictly
     # equivalent to the tree
     basis = pd.DataFrame(clr_inv(_gram_schmidt_basis(3)),
                          index=['Y1', 'Y2'],
                          columns=['a', 'b', 'c'])
     submodels = [self.model1, self.model2]
     res = submock(submodels=submodels,
                   basis=basis,
                   tree=self.tree,
                   balances=self.balances)
     res.fit()
     pdt.assert_frame_equal(res.residuals(project=True),
                            exp_resid,
                            check_exact=False,
                            check_less_precise=True)

示例#14

0

显示文件

    def test_regression_results_predict_projection(self):
        basis = pd.DataFrame(clr_inv(_gram_schmidt_basis(3)),
                             index=['Y1', 'Y2'],
                             columns=['a', 'b', 'c'])
        submodels = [self.model1, self.model2]
        res = submock(submodels=submodels,
                      basis=basis,
                      tree=self.tree,
                      balances=self.balances)
        res.fit()

        res_predict = res.predict(self.data[['X']], project=True)
        A = np.array  # aliasing np.array for the sake of pep8
        exp_predict = pd.DataFrame(
            {
                's1': ilr_inv(A([1.986842, 1.236842])),
                's2': ilr_inv(A([3.065789, 3.815789])),
                's3': ilr_inv(A([2.526316, 2.526316])),
                's4': ilr_inv(A([3.605263, 5.105263])),
                's5': ilr_inv(A([3.065789, 3.815789])),
                's6': ilr_inv(A([4.144737, 6.394737])),
                's7': ilr_inv(A([3.605263, 5.105263]))
            },
            index=['a', 'b', 'c']).T

        pdt.assert_frame_equal(res_predict, exp_predict)

示例#15

0

显示文件

文件： test_model.py 项目： josenavas/gneiss

    def test_regression_results_coefficient_projection(self):
        exp_coef = pd.DataFrame(
            {'Intercept': ilr_inv(np.array([[1.447368, -0.052632]])),
             'X': ilr_inv(np.array([[0.539474, 1.289474]]))},
            index=['a', 'b', 'c'])
        # note that in the example, the basis is not strictly
        # equivalent to the tree
        basis = pd.DataFrame(clr_inv(_gram_schmidt_basis(3)),
                             index=['Y1', 'Y2'],
                             columns=['a', 'b', 'c'])

        submodels = [self.model1, self.model2]
        res = submock(submodels=submodels, basis=basis,
                      tree=self.tree, balances=self.balances)
        res.fit()
        pdt.assert_frame_equal(res.coefficients(project=True), exp_coef,
                               check_exact=False,
                               check_less_precise=True)

示例#16

0

显示文件

    def test_mixedlm_balances(self):
        np.random.seed(6241)
        n = 1600
        exog = np.random.normal(size=(n, 2))
        groups = np.kron(np.arange(n / 16), np.ones(16))

        # Build up the random error vector
        errors = 0

        # The random effects
        exog_re = np.random.normal(size=(n, 2))
        slopes = np.random.normal(size=(n / 16, 2))
        slopes = np.kron(slopes, np.ones((16, 1))) * exog_re
        errors += slopes.sum(1)

        # First variance component
        errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4))

        # Second variance component
        errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2))

        # iid errors
        errors += np.random.normal(size=n)

        endog = exog.sum(1) + errors

        df = pd.DataFrame(index=range(n))
        df["y1"] = endog
        df["y2"] = endog + 2 * 2
        df["groups"] = groups
        df["x1"] = exog[:, 0]
        df["x2"] = exog[:, 1]

        tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        iv = ilr_inv(df[["y1", "y2"]].values)
        table = pd.DataFrame(iv, columns=['a', 'b', 'c'])
        metadata = df[['x1', 'x2', 'groups']]

        res = mixedlm("x1 + x2", table, metadata, tree, groups="groups")
        exp_pvalues = pd.DataFrame(
            [[4.923122e-236,  3.180390e-40,  3.972325e-35,  3.568599e-30],
             [9.953418e-02,  3.180390e-40,  3.972325e-35,  3.568599e-30]],
            index=['Y1', 'Y2'],
            columns=['Intercept', 'Intercept RE', 'x1', 'x2'])

        pdt.assert_frame_equal(res.pvalues, exp_pvalues,
                               check_less_precise=True)

        exp_coefficients = pd.DataFrame(
            [[4.211451,  -0.305906, 1.022008, 0.924873],
             [0.211451,  -0.305906, 1.022008, 0.924873]],
            columns=['Intercept', 'Intercept RE', 'x1', 'x2'],
            index=['Y1', 'Y2'])

        pdt.assert_frame_equal(res.coefficients(), exp_coefficients,
                               check_less_precise=True)

示例#17

0

显示文件

文件： test_ols.py 项目： biocore/gneiss

    def test_ols_ilr_inv_test(self):

        model = ols('x1 + x2', self.Y, self.X)
        model.fit()
        basis, _ = balance_basis(self.tree)
        # test pvalues
        exp = pd.DataFrame({'y1': self.r1_.pvalues, 'y2': self.r2_.pvalues})
        pdt.assert_frame_equal(model.pvalues, exp)

        # test coefficients
        exp = pd.DataFrame({'y1': self.r1_.params, 'y2': self.r2_.params})

        exp = pd.DataFrame(ilr_inv(exp, basis),
                           columns=['c', 'b', 'a'],
                           index=self.X.columns)

        res = model.coefficients(tree=self.tree)
        pdt.assert_frame_equal(res, exp)

        # test residuals
        exp = pd.DataFrame({
            'y1': self.r1_.resid,
            'y2': self.r2_.resid
        },
                           index=self.Y.index)
        exp = pd.DataFrame(ilr_inv(exp, basis),
                           index=self.Y.index,
                           columns=['c', 'b', 'a'])
        res = model.residuals(tree=self.tree)
        pdt.assert_frame_equal(res, exp)

        # test prediction
        exp = pd.DataFrame({
            'y1': self.r1_.predict(),
            'y2': self.r2_.predict()
        },
                           index=self.Y.index)
        exp = pd.DataFrame(ilr_inv(exp, basis),
                           index=self.Y.index,
                           columns=['c', 'b', 'a'])
        res = model.predict(tree=self.tree)
        pdt.assert_frame_equal(res, exp)

示例#18

0

显示文件

文件： test_composition.py 项目： yvonnelee1988/scikit-bio

    def test_ilr_basis_isomorphism(self):
        # tests to make sure that the isomorphism holds
        # with the introduction of the basis.
        basis = np.array([[0.80442968, 0.19557032]])
        table = np.array([[
            np.log(1 / 10) * np.sqrt(1 / 2),
            np.log(1.14141414 / 9.90909091) * np.sqrt(1 / 2),
            np.log(1.28282828 / 9.81818182) * np.sqrt(1 / 2),
            np.log(1.42424242 / 9.72727273) * np.sqrt(1 / 2),
            np.log(1.56565657 / 9.63636364) * np.sqrt(1 / 2)
        ]]).T
        res = ilr(ilr_inv(table, basis=basis), basis=basis)
        npt.assert_allclose(res, table.squeeze())

        table = np.array([[1., 10.], [1.14141414, 9.90909091],
                          [1.28282828, 9.81818182], [1.42424242, 9.72727273],
                          [1.56565657, 9.63636364]])

        res = ilr_inv(np.atleast_2d(ilr(table, basis=basis)).T, basis=basis)
        npt.assert_allclose(res, closure(table.squeeze()))

示例#19

0

显示文件

文件： test_composition.py 项目： RNAer/scikit-bio

    def test_ilr_basis_isomorphism(self):
        # tests to make sure that the isomorphism holds
        # with the introduction of the basis.
        basis = np.array([[0.80442968, 0.19557032]])
        table = np.array([[np.log(1/10)*np.sqrt(1/2),
                           np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                           np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                           np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                           np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
        res = ilr(ilr_inv(table, basis=basis), basis=basis)
        npt.assert_allclose(res, table.squeeze())

        table = np.array([[1., 10.],
                          [1.14141414, 9.90909091],
                          [1.28282828, 9.81818182],
                          [1.42424242, 9.72727273],
                          [1.56565657, 9.63636364]])

        res = ilr_inv(np.atleast_2d(ilr(table, basis=basis)).T, basis=basis)
        npt.assert_allclose(res, closure(table.squeeze()))

示例#20

0

显示文件

文件： _summary.py 项目： Josh-Lefler/gneiss

    def predict(self, X=None, project=False, **kwargs):
        """ Performs a prediction based on model.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates, where columns are covariates, and
            rows are samples.  If not specified, then the fitted values
            calculated from training the model will be returned.
        project : bool, optional
            Specifies if coefficients should be projected back into
            the Aitchison simplex [1]_.  If false, the coefficients will be
            represented as balances  (default: False).
        **kwargs : dict
            Other arguments to be passed into the model prediction.

        Returns
        -------
        pd.DataFrame
            A table of values where rows are coefficients, and the columns
            are either balances or proportions, depending on the value of
            `project`.

        References
        ----------
        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
           CDA work." Girona 24 (2003): 73-81.
        """
        self._check_projection(project)

        prediction = pd.DataFrame()
        for m in self.results:
            # check if X is none.
            p = pd.Series(m.predict(X, **kwargs))
            p.name = m.model.endog_names
            if X is not None:
                p.index = X.index
            else:
                p.index = m.fittedvalues.index
            prediction = prediction.append(p)

        if project:
            # `check=False`, due to a problem with error handling
            # addressed here https://github.com/biocore/scikit-bio/pull/1396
            # This will need to be fixed here:
            # https://github.com/biocore/gneiss/issues/34
            proj_prediction = ilr_inv(prediction.values.T,
                                      basis=self.basis,
                                      check=False)
            return pd.DataFrame(proj_prediction,
                                columns=self.feature_names,
                                index=prediction.columns)
        return prediction.T

示例#21

0

显示文件

    def setUp(self):
        A = np.array  # aliasing for the sake of pep8
        self.table = pd.DataFrame({
            's1': ilr_inv(A([1., 1.])),
            's2': ilr_inv(A([1., 2.])),
            's3': ilr_inv(A([1., 3.])),
            's4': ilr_inv(A([1., 4.])),
            's5': ilr_inv(A([1., 5.]))},
            index=['a', 'b', 'c']).T
        self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        self.unannotated_tree = TreeNode.read(['(c, (b,a));'])
        self.metadata = pd.DataFrame({
            'lame': [1, 1, 1, 1, 1],
            'real': [1, 2, 3, 4, 5]
        }, index=['s1', 's2', 's3', 's4', 's5'])

        np.random.seed(0)
        n = 15
        a = np.array([1, 4.2, 5.3, -2.2, 8])
        x1 = np.linspace(.01, 0.1, n)
        x2 = np.logspace(0, 0.01, n)
        x3 = np.exp(np.linspace(0, 0.01, n))
        x4 = x1 ** 2
        self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
        y = (a[0] + a[1]*x1 + a[2]*x2 + a[3]*x3 + a[4]*x4 +
             np.random.normal(size=n))
        sy = np.vstack((y, y/10)).T
        self.y = pd.DataFrame(ilr_inv(sy), columns=['a', 'b', 'c'])
        self.t2 = TreeNode.read([r"((a,b)n,c);"])

示例#22

0

显示文件

文件： test_ols.py 项目： biocore/gneiss

    def test_ols_ilr_inv_test(self):

        model = ols('x1 + x2', self.Y, self.X)
        model.fit()
        basis, _ = balance_basis(self.tree)
        # test pvalues
        exp = pd.DataFrame({'y1': self.r1_.pvalues,
                            'y2': self.r2_.pvalues})
        pdt.assert_frame_equal(model.pvalues, exp)

        # test coefficients
        exp = pd.DataFrame({'y1': self.r1_.params,
                            'y2': self.r2_.params})

        exp = pd.DataFrame(ilr_inv(exp, basis),
                           columns=['c', 'b', 'a'],
                           index=self.X.columns)

        res = model.coefficients(tree=self.tree)
        pdt.assert_frame_equal(res, exp)

        # test residuals
        exp = pd.DataFrame({'y1': self.r1_.resid,
                            'y2': self.r2_.resid},
                           index=self.Y.index)
        exp = pd.DataFrame(ilr_inv(exp, basis),
                           index=self.Y.index,
                           columns=['c', 'b', 'a'])
        res = model.residuals(tree=self.tree)
        pdt.assert_frame_equal(res, exp)

        # test prediction
        exp = pd.DataFrame({'y1': self.r1_.predict(),
                            'y2': self.r2_.predict()},
                           index=self.Y.index)
        exp = pd.DataFrame(ilr_inv(exp, basis),
                           index=self.Y.index,
                           columns=['c', 'b', 'a'])
        res = model.predict(tree=self.tree)
        pdt.assert_frame_equal(res, exp)

示例#23

0

显示文件

文件： _summary.py 项目： Josh-Lefler/gneiss

    def coefficients(self, project=False):
        """ Returns coefficients from fit.

        Parameters
        ----------
        project : bool, optional
            Specifies if coefficients should be projected back into
            the Aitchison simplex [1]_.  If false, the coefficients will be
            represented as balances  (default: False).

        Returns
        -------
        pd.DataFrame
            A table of values where columns are coefficients, and the index
            is either balances or proportions, depending on the value of
            `project`.

        Raises
        ------
        ValueError:
            Cannot perform projection into Aitchison simplex if `basis`
            is not specified.
        ValueError:
            Cannot perform projection into Aitchison simplex
            if `feature_names` is not specified.

        References
        ----------
        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
           CDA work." Girona 24 (2003): 73-81.
        """
        self._check_projection(project)
        coef = pd.DataFrame()

        for r in self.results:
            c = r.params
            c.name = r.model.endog_names
            coef = coef.append(c)

        if project:
            # `check=False`, due to a problem with error handling
            # addressed here https://github.com/biocore/scikit-bio/pull/1396
            # This will need to be fixed here:
            # https://github.com/biocore/gneiss/issues/34
            c = ilr_inv(coef.values.T, basis=self.basis, check=False).T
            return pd.DataFrame(c,
                                index=self.feature_names,
                                columns=coef.columns)
        else:
            return coef

示例#24

0

显示文件

 def test_ilr_inv_basis(self):
     exp = closure(np.array([[1., 10.],
                             [1.14141414, 9.90909091],
                             [1.28282828, 9.81818182],
                             [1.42424242, 9.72727273],
                             [1.56565657, 9.63636364]]))
     basis = np.array([[0.80442968, 0.19557032]])
     table = np.array([[np.log(1/10)*np.sqrt(1/2),
                        np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                        np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                        np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                        np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
     res = ilr_inv(table, basis=basis)
     npt.assert_allclose(res, exp)

示例#25

0

显示文件

文件： test_composition.py 项目： RNAer/scikit-bio

 def test_ilr_inv_basis(self):
     exp = closure(np.array([[1., 10.],
                             [1.14141414, 9.90909091],
                             [1.28282828, 9.81818182],
                             [1.42424242, 9.72727273],
                             [1.56565657, 9.63636364]]))
     basis = np.array([[0.80442968, 0.19557032]])
     table = np.array([[np.log(1/10)*np.sqrt(1/2),
                        np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                        np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                        np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                        np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
     res = ilr_inv(table, basis=basis)
     npt.assert_allclose(res, exp)

示例#26

0

显示文件

    def setUp(self):
        self.results = "results"
        if not os.path.exists(self.results):
            os.mkdir(self.results)
        self.balances = pd.DataFrame(
            {
                'a': [-2, -1, 0, 1, 2],
                'b': [-2, 0, 0, 0, 0]
            },
            index=['a1', 'a2', 'a3', 'a4', 'a5'])
        self.tree = TreeNode.read([r'((k, q)d, ((x, y)a, z)b)c;'])

        self.taxonomy = pd.DataFrame(
            [['foo;barf;a;b;c;d;e', 1], ['foo;bark;f;g;h;i;j', 1],
             ['foo;bark;f;g;h;w;j', 1], ['nom;tu;k;l;m;n;o', 0.9],
             ['nom;tu;k;l;m;t;o', 0.9]],
            columns=['Taxon', 'Confidence'],
            index=['x', 'y', 'z', 'k', 'q'])

        self.balances = pd.DataFrame(
            [[1, 2, 3, 4, 5, 6, 7], [-3.1, -2.9, -3, 3, 2.9, 3.2, 3.1],
             [1, 1, 1, 1, 1, 1, 1], [3, 2, 1, 0, -1, -2, -3]],
            index=['d', 'a', 'b', 'c'],
            columns=['s1', 's2', 's3', 's4', 's5', 's6', 's7']).T
        basis, _ = balance_basis(self.tree)
        self.table = pd.DataFrame(
            ilr_inv(self.balances, basis),
            columns=['x', 'y', 'z', 'k', 'q'],
            index=['s1', 's2', 's3', 's4', 's5', 's6', 's7'])

        index = pd.Index(['s1', 's2', 's3', 's4', 's5', 's6', 's7'], name='id')
        self.categorical = CategoricalMetadataColumn(
            pd.Series(['a', 'a', 'a', 'b', 'b', 'b', 'b'],
                      index=index,
                      name='categorical'))
        self.multi_categorical = CategoricalMetadataColumn(
            pd.Series(['a', 'a', 'c', 'b', 'b', 'b', 'c'],
                      index=index,
                      name='multi_categorical'))
        self.partial_numerical_categorical = CategoricalMetadataColumn(
            pd.Series(['1', '1', '1', '2', '2', '2', 'a'],
                      index=index,
                      name='multi_categorical'))
        self.full_numerical_categorical = CategoricalMetadataColumn(
            pd.Series(['1', '1', '1.0', '2', '2', '2.0', '3'],
                      index=index,
                      name='numerical_categorical'))
        self.continuous = NumericMetadataColumn(
            pd.Series(np.arange(7), index=index, name='continuous'))

示例#27

0

显示文件

文件： _summary.py 项目： Josh-Lefler/gneiss

    def residuals(self, project=False):
        """ Returns calculated residuals.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates.  If not specified, then the
            fitted values calculated from training the model will be
            returned.
        project : bool, optional
            Specifies if coefficients should be projected back into
            the Aitchison simplex [1]_.  If false, the coefficients will be
            represented as balances  (default: False).

        Returns
        -------
        pd.DataFrame
            A table of values where rows are samples, and the columns
            are either balances or proportions, depending on the value of
            `project`.

        References
        ----------
        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
           CDA work." Girona 24 (2003): 73-81.
        """
        self._check_projection(project)

        resid = pd.DataFrame()

        for r in self.results:
            err = r.resid
            err.name = r.model.endog_names
            resid = resid.append(err)

        if project:
            # `check=False`, due to a problem with error handling
            # addressed here https://github.com/biocore/scikit-bio/pull/1396
            # This will need to be fixed here:
            # https://github.com/biocore/gneiss/issues/34
            proj_resid = ilr_inv(resid.values.T, basis=self.basis,
                                 check=False).T
            return pd.DataFrame(proj_resid,
                                index=self.feature_names,
                                columns=resid.columns).T
        else:
            return resid.T

示例#28

0

显示文件

文件： _mixedlm.py 项目： biocore/gneiss

    def predict(self, X=None, tree=None, **kwargs):
        """ Performs a prediction based on model.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates, where columns are covariates, and
            rows are samples.  If not specified, then the fitted values
            calculated from training the model will be returned.
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented
            as proportions. Otherwise, if this is not specified,
            the prediction will be represented as balances. (default: None).
        **kwargs : dict
            Other arguments to be passed into the model prediction.

        Returns
        -------
        pd.DataFrame
            A table of predicted values where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.
        """
        prediction = pd.DataFrame()
        for m in self.results:
            # check if X is none.
            p = pd.Series(m.predict(X, **kwargs))
            p.name = m.model.endog_names
            if X is not None:
                p.index = X.index
            else:
                p.index = m.fittedvalues.index
            prediction = prediction.append(p)

        if tree is not None:
            basis, _ = balance_basis(tree)
            proj_prediction = ilr_inv(prediction.values.T, basis=basis)
            return pd.DataFrame(proj_prediction,
                                columns=[n.name for n in tree.tips()],
                                index=prediction.columns)
        else:
            return prediction.T

示例#29

0

显示文件

文件： _mixedlm.py 项目： biocore/gneiss

    def residuals(self, tree=None):
        """ Returns calculated residuals from fit.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates.  If not specified, then the
            fitted values calculated from training the model will be
            returned.
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented
            as proportions. Otherwise, if this is not specified,
            the prediction will be represented as balances. (default: None).

        Returns
        -------
        pd.DataFrame
            A table of residuals where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.

        References
        ----------
        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
           CDA work." Girona 24 (2003): 73-81.
        """
        resid = pd.DataFrame()

        for r in self.results:
            err = r.resid
            err.name = r.model.endog_names
            resid = resid.append(err)

        if tree is not None:
            basis, _ = balance_basis(tree)
            proj_resid = ilr_inv(resid.values.T, basis=basis).T
            return pd.DataFrame(proj_resid,
                                index=[n.name for n in tree.tips()],
                                columns=resid.columns).T
        else:
            return resid.T

示例#30

0

显示文件

文件： _model.py 项目： biocore/gneiss

    def predict(self, X=None, tree=None, **kwargs):
        """ Performs a prediction based on model.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates, where columns are covariates, and
            rows are samples.  If not specified, then the fitted values
            calculated from training the model will be returned.
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented
            as proportions. Otherwise, if this is not specified,
            the prediction will be represented as balances. (default: None).
        **kwargs : dict
            Other arguments to be passed into the model prediction.

        Returns
        -------
        pd.DataFrame
            A table of predicted values where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.

        """
        if not self._fitted:
            ValueError(('Model not fitted - coefficients not calculated.'
                        'See `fit()`'))
        if X is None:
            X = self.design_matrices

        prediction = X.dot(self._beta)
        if tree is not None:
            basis, _ = balance_basis(tree)
            proj_prediction = ilr_inv(prediction.values, basis=basis)
            ids = [n.name for n in tree.tips()]
            return pd.DataFrame(proj_prediction,
                                columns=ids,
                                index=prediction.index)
        else:
            return prediction

示例#31

0

显示文件

文件： _ols.py 项目： thermokarst-forks/gneiss

    def predict(self, X=None, tree=None, **kwargs):
        """ Performs a prediction based on model.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates, where columns are covariates, and
            rows are samples.  If not specified, then the fitted values
            calculated from training the model will be returned.
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented
            as proportions. Otherwise, if this is not specified,
            the prediction will be represented as balances. (default: None).
        **kwargs : dict
            Other arguments to be passed into the model prediction.

        Returns
        -------
        pd.DataFrame
            A table of predicted values where columns are coefficients,
            and the rows are balances. If `tree` is specified, then
            the rows are proportions.

        """
        if not self._fitted:
            ValueError(('Model not fitted - coefficients not calculated.'
                        'See `fit()`'))
        if X is None:
            X = self.design_matrices

        prediction = X.dot(self._beta)
        if tree is not None:
            basis, _ = balance_basis(tree)
            proj_prediction = ilr_inv(prediction.values, basis=basis)
            ids = [n.name for n in tree.tips()]
            return pd.DataFrame(proj_prediction,
                                columns=ids,
                                index=prediction.index)
        else:
            return prediction

示例#32

0

显示文件

文件： test_composition.py 项目： yvonnelee1988/scikit-bio

    def test_ilr_inv(self):
        mat = closure(self.cdata7)
        npt.assert_array_almost_equal(ilr_inv(ilr(mat)), mat)

        npt.assert_allclose(ilr_inv(np.identity(3)),
                            self.ortho1,
                            rtol=1e-04,
                            atol=1e-06)

        with self.assertRaises(ValueError):
            ilr_inv(self.cdata1, basis=self.cdata1)

        # make sure that inplace modification is not occurring
        ilr_inv(self.cdata1)
        npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]]))

示例#33

0

显示文件

文件： test_composition.py 项目： disulfidebond/scikit-bio

    def test_ilr_inv(self):
        mat = closure(self.cdata7)
        npt.assert_array_almost_equal(ilr_inv(ilr(mat)), mat)

        npt.assert_allclose(ilr_inv(np.identity(3)), self.ortho1,
                            rtol=1e-04, atol=1e-06)

        with self.assertRaises(ValueError):
            ilr_inv(self.cdata1, basis=self.cdata1)

        # make sure that inplace modification is not occurring
        ilr_inv(self.cdata1)
        npt.assert_allclose(self.cdata1,
                            np.array([[2, 2, 6],
                                      [4, 4, 2]]))

示例#34

0

显示文件

文件： _model.py 项目： biocore/gneiss

    def residuals(self, tree=None):
        """ Returns calculated residuals from fit.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates.  If not specified, then the
            fitted values calculated from training the model will be
            returned.
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented
            as proportions. Otherwise, if this is not specified,
            the prediction will be represented as balances. (default: None).

        Returns
        -------
        pd.DataFrame
            A table of residuals where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.

        References
        ----------
        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
           CDA work." Girona 24 (2003): 73-81.
        """
        if not self._fitted:
            ValueError(('Model not fitted - coefficients not calculated.'
                        'See `fit()`'))
        resid = self._resid
        if tree is not None:
            basis, _ = balance_basis(tree)
            proj_resid = ilr_inv(resid.values, basis=basis)
            ids = [n.name for n in tree.tips()]
            return pd.DataFrame(proj_resid,
                                columns=ids,
                                index=resid.index)
        else:
            return resid

示例#35

0

显示文件

    def setUp(self):
        np.random.seed(6241)
        n = 1600
        exog = np.random.normal(size=(n, 2))
        groups = np.kron(np.arange(n // 16), np.ones(16))

        # Build up the random error vector
        errors = 0

        # The random effects
        exog_re = np.random.normal(size=(n, 2))
        slopes = np.random.normal(size=(n // 16, 2))
        slopes = np.kron(slopes, np.ones((16, 1))) * exog_re
        errors += slopes.sum(1)

        # First variance component
        errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4))

        # Second variance component
        errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2))

        # iid errors
        errors += np.random.normal(size=n)

        endog = exog.sum(1) + errors

        df = pd.DataFrame(index=range(n))
        df["y1"] = endog
        df["y2"] = endog + 2 * 2
        df["groups"] = groups
        df["x1"] = exog[:, 0]
        df["x2"] = exog[:, 1]

        self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
        iv = ilr_inv(df[["y1", "y2"]].values)
        self.table = pd.DataFrame(iv, columns=['a', 'b', 'c'])
        self.metadata = df[['x1', 'x2', 'groups']]

        self.results = "results"
        os.mkdir(self.results)

示例#36

0

显示文件

文件： _model.py 项目： thermokarst-forks/gneiss

    def residuals(self, tree=None):
        """ Returns calculated residuals from fit.

        Parameters
        ----------
        X : pd.DataFrame, optional
            Input table of covariates.  If not specified, then the
            fitted values calculated from training the model will be
            returned.
        tree : skbio.TreeNode, optional
            The tree used to perform the ilr transformation.  If this
            is specified, then the prediction will be represented
            as proportions. Otherwise, if this is not specified,
            the prediction will be represented as balances. (default: None).

        Returns
        -------
        pd.DataFrame
            A table of residuals where rows are covariates,
            and the columns are balances. If `tree` is specified, then
            the columns are proportions.

        References
        ----------
        .. [1] Aitchison, J. "A concise guide to compositional data analysis,
           CDA work." Girona 24 (2003): 73-81.
        """
        if not self._fitted:
            ValueError(('Model not fitted - coefficients not calculated.'
                        'See `fit()`'))
        resid = self._resid
        if tree is not None:
            basis, _ = balance_basis(tree)
            proj_resid = ilr_inv(resid.values, basis=basis)
            ids = [n.name for n in tree.tips()]
            return pd.DataFrame(proj_resid, columns=ids, index=resid.index)
        else:
            return resid

示例#37

0

显示文件

    def test_ols_empty_metadata_error(self):
        A = np.array  # aliasing for the sake of pep8
        table = pd.DataFrame({
            'k1': ilr_inv(A([1., 1.])),
            'k2': ilr_inv(A([1., 2.])),
            'k3': ilr_inv(A([1., 3.])),
            'k4': ilr_inv(A([1., 4.])),
            'k5': ilr_inv(A([1., 5.])),
            'k6': ilr_inv(A([1., 5.]))},
            index=['a', 'b', 'c']).T

        tree = TreeNode.read(['((c,d),(b,a)Y2)Y1;'])
        metadata = pd.DataFrame({
            'lame': [1, 1, 1, 1, 1],
            'real': [1, 2, 3, 4, 5]
        }, index=['s1', 's2', 's3', 's4', 's5'])
        with self.assertRaises(ValueError):
            ols('real + lame', table, metadata, tree)

示例#38

0

显示文件

文件： test_summary.py 项目： gitter-badger/gneiss

 def test_regression_results_residuals_projection(self):
     A = np.array  # aliasing np.array for the sake of pep8
     exp_resid = pd.DataFrame(
         {
             's1': ilr_inv(A([-0.986842, -0.236842])),
             's2': ilr_inv(A([-0.065789, -1.815789])),
             's3': ilr_inv(A([1.473684, 0.473684])),
             's4': ilr_inv(A([1.394737, -1.105263])),
             's5': ilr_inv(A([-1.065789, 1.184211])),
             's6': ilr_inv(A([-1.144737, -0.394737])),
             's7': ilr_inv(A([0.394737, 1.894737]))
         },
         index=['Z1', 'Z2', 'Z3']).T
     feature_names = ['Z1', 'Z2', 'Z3']
     basis = _gram_schmidt_basis(3)
     res = RegressionResults(self.results,
                             basis=basis,
                             feature_names=feature_names)
     pdt.assert_frame_equal(res.residuals(project=True),
                            exp_resid,
                            check_exact=False,
                            check_less_precise=True)

示例#39

0

显示文件

文件： test_summary.py 项目： gitter-badger/gneiss

    def test_regression_results_predict_projection(self):
        feature_names = ['Z1', 'Z2', 'Z3']
        basis = _gram_schmidt_basis(3)
        model = RegressionResults(self.results,
                                  basis=basis,
                                  feature_names=feature_names)

        res_predict = model.predict(self.data[['X']], project=True)
        A = np.array  # aliasing np.array for the sake of pep8
        exp_predict = pd.DataFrame(
            {
                's1': ilr_inv(A([1.986842, 1.236842])),
                's2': ilr_inv(A([3.065789, 3.815789])),
                's3': ilr_inv(A([2.526316, 2.526316])),
                's4': ilr_inv(A([3.605263, 5.105263])),
                's5': ilr_inv(A([3.065789, 3.815789])),
                's6': ilr_inv(A([4.144737, 6.394737])),
                's7': ilr_inv(A([3.605263, 5.105263]))
            },
            index=feature_names).T

        pdt.assert_frame_equal(res_predict, exp_predict)