示例#1
0
    def from_random_effects(cls, y, x, z,
                            p_path=None,
                            overwrite=False,
                            max_condition_number=1e-10,
                            complexity_bound=8192):
        r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`.

        Examples
        --------
        >>> from hail.stats import LinearMixedModel
        >>> y = np.array([0.0, 1.0, 8.0, 9.0])
        >>> x = np.array([[1.0, 0.0],
        ...               [1.0, 2.0],
        ...               [1.0, 1.0],
        ...               [1.0, 4.0]])
        >>> z = np.array([[0.0, 0.0, 1.0],
        ...               [0.0, 1.0, 2.0],
        ...               [1.0, 2.0, 4.0],
        ...               [2.0, 4.0, 8.0]])
        >>> model, p = LinearMixedModel.from_random_effects(y, x, z)
        >>> model.fit()
        >>> model.h_sq
        0.38205307244271675

        Notes
        -----
        If :math:`n \leq m`, the returned model is full rank.

        If :math:`n > m`, the returned model is low rank. In this case only,
        eigenvalues less than or equal to `max_condition_number` times the top
        eigenvalue are dropped from :math:`S`, with the corresponding
        eigenvectors dropped from :math:`P`. This guards against precision
        loss on left eigenvectors computed via the right gramian :math:`Z^T Z`
        in :meth:`BlockMatrix.svd`.

        In either case, one can truncate to a rank :math:`r` model as follows.
        If `p` is an ndarray:

        >>> p_r = p[:r, :]     # doctest: +SKIP
        >>> s_r = model.s[:r]  # doctest: +SKIP
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x)  # doctest: +SKIP

        If `p` is a block matrix:

        >>> p[:r, :].write(p_r_path)          # doctest: +SKIP
        >>> p_r = BlockMatrix.read(p_r_path)  # doctest: +SKIP
        >>> s_r = model.s[:r]                 # doctest: +SKIP
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x, p_r_path)  # doctest: +SKIP

        This method applies no standardization to `z`.

        Warning
        -------
        If `z` is a block matrix, then ideally `z` should be the result of
        directly reading from disk (and possibly a transpose). This is most
        critical if :math:`n > m`, because in this case multiplication by `z`
        will result in all preceding transformations being repeated
        ``n / block_size`` times, as explained in :class:`.BlockMatrix`.

        At least one dimension must be less than or equal to 46300.
        See the warning in :meth:`.BlockMatrix.svd` for performance
        considerations.

        Parameters
        ----------
        y: :class:`ndarray`
            :math:`n` vector of observations :math:`y`.
        x: :class:`ndarray`
            :math:`n \times p` matrix of fixed effects :math:`X`.
        z: :class:`ndarray` or :class:`BlockMatrix`
            :math:`n \times m` matrix of random effects :math:`Z`.
        p_path: :obj:`str`, optional
            Path at which to write :math:`P` as a block matrix.
            Required if `z` is a block matrix.
        overwrite: :obj:`bool`
            If ``True``, overwrite an existing file at `p_path`.
        max_condition_number: :obj:`float`
            Maximum condition number. Must be greater than 1e-16.
        complexity_bound: :obj:`int`
            Complexity bound for :meth:`.BlockMatrix.svd` when `z` is a block
            matrix.

        Returns
        -------
        model: :class:`LinearMixedModel`
            Model constructed from :math:`y`, :math:`X`, and :math:`Z`.
        p: :class:`ndarray` or :class:`.BlockMatrix`
            Matrix :math:`P` whose rows are the eigenvectors of :math:`K`.
            The type is block matrix if `z` is a block matrix and
            :meth:`.BlockMatrix.svd` of `z` returns :math:`U` as a block matrix.
        """
        z_is_bm = isinstance(z, BlockMatrix)

        if z_is_bm and p_path is None:
            raise ValueError("from_random_effects: 'p_path' required when 'z'"
                             "is a block matrix.")

        if max_condition_number < 1e-16:
            raise ValueError("from_random_effects: 'max_condition_number' must "
                             f"be at least 1e-16, found {max_condition_number}")

        _check_dims(y, "y", 1)
        _check_dims(x, "x", 2)
        _check_dims(z, "z", 2)

        n, m = z.shape

        if y.shape[0] != n:
            raise ValueError("from_random_effects: 'y' and 'z' must have the "
                             "same number of rows")
        if x.shape[0] != n:
            raise ValueError("from_random_effects: 'x' and 'z' must have the "
                             "same number of rows")

        if z_is_bm:
            u, s0, _ = z.svd(complexity_bound=complexity_bound)
            p = u.T
            p_is_bm = isinstance(p, BlockMatrix)
        else:
            u, s0, _ = hl.linalg._svd(z, full_matrices=False)
            p = u.T
            p_is_bm = False

        s = s0 ** 2

        low_rank = n > m

        if low_rank:
            assert np.all(np.isfinite(s))
            r = np.searchsorted(-s, -max_condition_number * s[0])
            if r < m:
                info(f'from_random_effects: model rank reduced from {m} to {r} '
                     f'due to ill-condition.'
                     f'\n    Largest dropped eigenvalue was {s[r]}.')
            s = s[:r]
            p = p[:r, :]

        if p_path is not None:
            if p_is_bm:
                p.write(p_path, overwrite=overwrite)
                p = BlockMatrix.read(p_path)
            else:
                BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite)
        if p_is_bm:
            py, px = (p @ y).to_numpy(), (p @ x).to_numpy()
        else:
            py, px = p @ y, p @ x

        if low_rank:
            model = LinearMixedModel(py, px, s, y, x, p_path)
        else:
            model = LinearMixedModel(py, px, s, p_path=p_path)

        return model, p
示例#2
0
    def __init__(self, py, px, s, y=None, x=None, p_path=None):
        if y is None and x is None:
            low_rank = False
        elif y is not None and x is not None:
            low_rank = True
        else:
            raise ValueError('for low-rank, set both y and x; for full-rank, do not set y or x.')

        _check_dims(py, 'py', 1)
        _check_dims(px, 'px', 2)
        _check_dims(s, 's', 1)

        r = s.size
        f = px.shape[1]

        if py.size != r:
            raise ValueError("py and s must have the same size")
        if px.shape[0] != r:
            raise ValueError("px must have the same number of rows as the size of s")
        if low_rank:
            _check_dims(y, 'y', 1)
            _check_dims(x, 'x', 2)
            n = y.size
            if n <= r:
                raise ValueError("size of y must be larger than the size of s")
            if x.shape[0] != n:
                raise ValueError("x must have the same number of rows as the size of y")
            if x.shape[1] != f:
                raise ValueError("px and x must have the same number columns")
        else:
            n = r

        if p_path is not None:
            n_rows, n_cols = BlockMatrix.read(p_path).shape
            if n_cols != n:
                raise ValueError("LinearMixedModel: Number of columns in the block "
                                 f"matrix at 'p_path' ({n_cols}) must equal "
                                 f"the size of 'y' ({n})")
            if n_rows != r:
                raise ValueError("LinearMixedModel: Number of rows in the block "
                                 f"matrix at 'p_path' ({n_rows}) must equal "
                                 f"the size of 'py' ({r})")

        self.low_rank = low_rank
        self.n = n
        self.f = f
        self.r = r
        self.py = py
        self.px = px
        self.s = s
        self.y = y
        self.x = x
        self.p_path = p_path

        self._check_dof()

        self.beta = None
        self.sigma_sq = None
        self.tau_sq = None
        self.gamma = None
        self.log_gamma = None
        self.h_sq = None
        self.h_sq_standard_error = None
        self.optimize_result = None

        self._fitted = False

        if low_rank:
            self._yty = y @ y
            self._xty = x.T @ y
            self._xtx = x.T @ x

        self._dof = n - f
        self._d = None
        self._ydy = None
        self._xdy = None
        self._xdx = None

        self._dof_alt = n - (f + 1)
        self._d_alt = None
        self._ydy_alt = None
        self._xdy_alt = np.zeros(f + 1)
        self._xdx_alt = np.zeros((f + 1, f + 1))

        self._residual_sq = None

        self._scala_model = None
示例#3
0
    def from_kinship(cls, y, x, k, p_path=None, overwrite=False):
        r"""Initializes a model from :math:`y`, :math:`X`, and :math:`K`.

        Examples
        --------
        >>> from hail.stats import LinearMixedModel
        >>> y = np.array([0.0, 1.0, 8.0, 9.0])
        >>> x = np.array([[1.0, 0.0],
        ...               [1.0, 2.0],
        ...               [1.0, 1.0],
        ...               [1.0, 4.0]])
        >>> k = np.array([[ 1.        , -0.8727875 ,  0.96397335,  0.94512946],
        ...               [-0.8727875 ,  1.        , -0.93036112, -0.97320323],
        ...               [ 0.96397335, -0.93036112,  1.        ,  0.98294169],
        ...               [ 0.94512946, -0.97320323,  0.98294169,  1.        ]])
        >>> model, p = LinearMixedModel.from_kinship(y, x, k)
        >>> model.fit()
        >>> model.h_sq
        0.2525148830695317

        >>> model.s
        array([3.83501295, 0.13540343, 0.02454114, 0.00504248])

        Truncate to a rank :math:`r=2` model:

        >>> r = 2
        >>> s_r = model.s[:r]
        >>> p_r = p[:r, :]
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x)
        >>> model.fit()
        >>> model.h_sq
        0.25193197591429695

        Notes
        -----
        This method eigendecomposes :math:`K = P^T S P` on the master and
        returns ``LinearMixedModel(p @ y, p @ x, s)`` and ``p``.

        The performance of eigendecomposition depends critically on the
        number of master cores and the NumPy / SciPy configuration, viewable
        with ``np.show_config()``. For Intel machines, we recommend installing
        the `MKL <https://anaconda.org/anaconda/mkl>`__ package for Anaconda, as
        is done by `cloudtools <https://github.com/Nealelab/cloudtools>`__.

        `k` must be positive semi-definite; symmetry is not checked as only the
        lower triangle is used.

        Parameters
        ----------
        y: :class:`ndarray`
            :math:`n` vector of observations.
        x: :class:`ndarray`
            :math:`n \times p` matrix of fixed effects.
        k: :class:`ndarray`
            :math:`n \times n` positive semi-definite kernel :math:`K`.
        p_path: :obj:`str`, optional
            Path at which to write :math:`P` as a block matrix.
        overwrite: :obj:`bool`
            If ``True``, overwrite an existing file at `p_path`.

        Returns
        -------
        model: :class:`LinearMixedModel`
            Model constructed from :math:`y`, :math:`X`, and :math:`K`.
        p: :class:`ndarray`
            Matrix :math:`P` whose rows are the eigenvectors of :math:`K`.
        """
        _check_dims(y, "y", 1)
        _check_dims(x, "x", 2)
        _check_dims(k, "k", 2)

        n = k.shape[0]
        if k.shape[1] != n:
            raise ValueError("from_kinship: 'k' must be a square matrix")
        if y.shape[0] != n:
            raise ValueError("from_kinship: 'y' and 'k' must have the same "
                             "number of rows")
        if x.shape[0] != n:
            raise ValueError("from_kinship: 'x' and 'k' must have the same "
                             "number of rows")

        s, u = hl.linalg._eigh(k)
        if s[0] < -1e12 * s[-1]:
            raise Exception("from_kinship: smallest eigenvalue of 'k' is"
                            f"negative: {s[0]}")

        # flip singular values to descending order
        s = np.flip(s, axis=0)
        u = np.fliplr(u)
        p = u.T
        if p_path:
            BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite)

        model = LinearMixedModel(p @ y, p @ x, s, p_path=p_path)
        return model, p
示例#4
0
    def from_mixed_effects(cls, y, x, z, max_condition_number=1e-10):
        r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`.

        Examples
        --------
        >>> from hail.stats import LinearMixedModel
        >>> y = np.array([0.0, 1.0, 8.0, 9.0])
        >>> x = np.array([[1.0, 0.0],
        ...               [1.0, 2.0],
        ...               [1.0, 1.0],
        ...               [1.0, 4.0]])
        >>> z = np.array([[0.0, 0.0, 1.0],
        ...               [0.0, 1.0, 2.0],
        ...               [1.0, 2.0, 4.0],
        ...               [2.0, 4.0, 8.0]])
        >>> model, p = LinearMixedModel.from_mixed_effects(y, x, z)
        >>> model.fit()
        >>> model.h_sq
        0.38205307244271675

        Notes
        -----
        If :math:`n \leq m`, the returned model is full rank.

        If :math:`n < m`, the returned model is low rank. In this case only,
        eigenvalues less than or equal to `max_condition_number` times the top
        eigenvalue are dropped from :math:`S`, with the corresponding
        eigenvectors dropped from :math:`P`. This guards against precision
        loss on left eigenvectors computed via the right gramian :math:`Z^T Z`
        in :meth:`BlockMatrix.svd`.

        In either case, one can truncate to a rank :math:`r` model as follows:

        >>> s_r = model.s[:r]  # doctest: +SKIP
        >>> p_r = p[:r, :]  # doctest: +SKIP
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x)  # doctest: +SKIP

        No standardization is applied to `z`.

        Warning
        -------
        If `z` is a block matrix, then ideally `z` should be the result of
        directly reading from disk (and possibly a transpose). This is most
        critical if :math:`n > m`, because in this case multiplication by `z`
        will result in all preceding transformations being repeated
        ``n / block_size`` times, as explained in :class:`BlockMatrix`.

        Parameters
        ----------
        y: :class:`ndarray`
            :math:`n` vector of observations :math:`y`.
        x: :class:`ndarray`
            :math:`n \times p` matrix of fixed effects :math:`X`.
        z: :class:`ndarray` or :class:`BlockMatrix`
            :math:`n \times m` matrix of random effects :math:`Z`.
        max_condition_number: :obj:`float`
            Maximum condition number. Must be greater than 1e-16.

        Returns
        -------
        model: :class:`LinearMixedModel`
            Model constructed from :math:`y`, :math:`X`, and :math:`Z`.
        p: :class:`ndarray`
            Matrix :math:`P` whose rows are the eigenvectors of :math:`K`.
        """
        if max_condition_number < 1e-16:
            raise ValueError(
                "from_random_effects: 'max_condition_number' must "
                f"be at least 1e-16, found {max_condition_number}")

        _check_dims(y, "y", 1)
        _check_dims(x, "x", 2)
        _check_dims(z, "z", 2)

        n, m = z.shape

        if y.shape[0] != n:
            raise ValueError("from_mixed_effects: 'y' and 'z' must have the "
                             "same number of rows")
        if x.shape[0] != n:
            raise ValueError("from_mixed_effects: 'x' and 'z' must have the "
                             "same number of rows")

        if isinstance(z, np.ndarray):
            u, s0, _ = hl.linalg._svd(z, full_matrices=False)
            p = u.T
            py, px = p @ y, p @ x
        else:
            u, s0, _ = z.svd()
            p = u.T
            py, px = (p @ y).to_numpy(), (p @ x).to_numpy()

        s = s0**2

        full_rank = n <= m
        if full_rank:
            model = LinearMixedModel(py, px, s)
        else:
            assert np.all(np.isfinite(s))
            r = np.searchsorted(-s, -max_condition_number * s[0])
            if r < m:
                info(f'from_mixed_effects: model rank reduced from {m} to {r} '
                     f'due to ill-condition.'
                     f'\n    Largest dropped eigenvalue was {s[r]}.')
            s = s[:r]
            p = p[:r, :]
            model = LinearMixedModel(py, px, s, y, x)
        return model, p
示例#5
0
    def from_kinship(cls, y, x, k):
        r"""Initializes a model from :math:`y`, :math:`X`, and :math:`K`.

        Examples
        --------
        >>> from hail.stats import LinearMixedModel
        >>> y = np.array([0.0, 1.0, 8.0, 9.0])
        >>> x = np.array([[1.0, 0.0],
        ...               [1.0, 2.0],
        ...               [1.0, 1.0],
        ...               [1.0, 4.0]])
        >>> k = np.array([[ 1.        , -0.8727875 ,  0.96397335,  0.94512946],
        ...               [-0.8727875 ,  1.        , -0.93036112, -0.97320323],
        ...               [ 0.96397335, -0.93036112,  1.        ,  0.98294169],
        ...               [ 0.94512946, -0.97320323,  0.98294169,  1.        ]])
        >>> model, p = LinearMixedModel.from_kinship(y, x, k)
        >>> model.fit()
        >>> model.h_sq
        0.2525148830695317

        >>> model.s
        array([3.83501295, 0.13540343, 0.02454114, 0.00504248])

        Truncate to a rank :math:`r=2` model:

        >>> r = 2
        >>> s_r = model.s[:r]
        >>> p_r = p[:r, :]
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x)
        >>> model.fit()
        >>> model.h_sq
        0.25193197591429695

        Notes
        -----
        This method eigendecomposes :math:`K = P^T S P` and returns
        ``LinearMixedModel(p @ y, p @ x, s)`` and ``p``.

        Only the lower triangle of `k` is used; symmetry is not checked.

        Parameters
        ----------
        y: :class:`ndarray`
            :math:`n` vector of observations.
        x: :class:`ndarray`
            :math:`n \times p` matrix of fixed effects.
        k: :class:`ndarray`
            :math:`n \times n` positive semi-definite kernel :math:`K`.

        Returns
        -------
        model: :class:`LinearMixedModel`
            Model constructed from :math:`y`, :math:`X`, and :math:`K`.
        p: :class:`ndarray`
            Matrix :math:`P` whose rows are the eigenvectors of :math:`K`.
        """
        _check_dims(y, "y", 1)
        _check_dims(x, "x", 2)
        _check_dims(k, "k", 2)

        n = k.shape[0]
        if k.shape[1] != n:
            raise ValueError("from_kinship: 'k' must be a square matrix")
        if y.shape[0] != n:
            raise ValueError("from_kinship: 'y' and 'k' must have the same "
                             "number of rows")
        if x.shape[0] != n:
            raise ValueError("from_kinship: 'x' and 'k' must have the same "
                             "number of rows")

        s, u = hl.linalg._eigh(k)
        if s[0] < -1e12 * s[-1]:
            raise Exception("from_kinship: smallest eigenvalue of 'k' is"
                            f"negative: {s[0]}")

        # flip singular values to descending order
        s = np.flip(s, axis=0)
        u = np.fliplr(u)
        p = u.T

        model = LinearMixedModel(p @ y, p @ x, s)
        return model, p
示例#6
0
    def from_random_effects(cls, y, x, z,
                            p_path=None,
                            overwrite=False,
                            max_condition_number=1e-10,
                            complexity_bound=8192):
        r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`.

        Examples
        --------
        >>> from hail.stats import LinearMixedModel
        >>> y = np.array([0.0, 1.0, 8.0, 9.0])
        >>> x = np.array([[1.0, 0.0],
        ...               [1.0, 2.0],
        ...               [1.0, 1.0],
        ...               [1.0, 4.0]])
        >>> z = np.array([[0.0, 0.0, 1.0],
        ...               [0.0, 1.0, 2.0],
        ...               [1.0, 2.0, 4.0],
        ...               [2.0, 4.0, 8.0]])
        >>> model, p = LinearMixedModel.from_random_effects(y, x, z)
        >>> model.fit()
        >>> model.h_sq
        0.38205307244271675

        Notes
        -----
        If :math:`n \leq m`, the returned model is full rank.

        If :math:`n > m`, the returned model is low rank. In this case only,
        eigenvalues less than or equal to `max_condition_number` times the top
        eigenvalue are dropped from :math:`S`, with the corresponding
        eigenvectors dropped from :math:`P`. This guards against precision
        loss on left eigenvectors computed via the right gramian :math:`Z^T Z`
        in :meth:`BlockMatrix.svd`.

        In either case, one can truncate to a rank :math:`r` model as follows.
        If `p` is an ndarray:

        >>> p_r = p[:r, :]     # doctest: +SKIP
        >>> s_r = model.s[:r]  # doctest: +SKIP
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x)  # doctest: +SKIP

        If `p` is a block matrix:

        >>> p[:r, :].write(p_r_path)          # doctest: +SKIP
        >>> p_r = BlockMatrix.read(p_r_path)  # doctest: +SKIP
        >>> s_r = model.s[:r]                 # doctest: +SKIP
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x, p_r_path)  # doctest: +SKIP

        This method applies no standardization to `z`.

        Warning
        -------
        If `z` is a block matrix, then ideally `z` should be the result of
        directly reading from disk (and possibly a transpose). This is most
        critical if :math:`n > m`, because in this case multiplication by `z`
        will result in all preceding transformations being repeated
        ``n / block_size`` times, as explained in :class:`.BlockMatrix`.

        At least one dimension must be less than or equal to 46300.
        See the warning in :meth:`.BlockMatrix.svd` for performance
        considerations.

        Parameters
        ----------
        y: :class:`ndarray`
            :math:`n` vector of observations :math:`y`.
        x: :class:`ndarray`
            :math:`n \times p` matrix of fixed effects :math:`X`.
        z: :class:`ndarray` or :class:`BlockMatrix`
            :math:`n \times m` matrix of random effects :math:`Z`.
        p_path: :obj:`str`, optional
            Path at which to write :math:`P` as a block matrix.
            Required if `z` is a block matrix.
        overwrite: :obj:`bool`
            If ``True``, overwrite an existing file at `p_path`.
        max_condition_number: :obj:`float`
            Maximum condition number. Must be greater than 1e-16.
        complexity_bound: :obj:`int`
            Complexity bound for :meth:`.BlockMatrix.svd` when `z` is a block
            matrix.

        Returns
        -------
        model: :class:`LinearMixedModel`
            Model constructed from :math:`y`, :math:`X`, and :math:`Z`.
        p: :class:`ndarray` or :class:`.BlockMatrix`
            Matrix :math:`P` whose rows are the eigenvectors of :math:`K`.
            The type is block matrix if `z` is a block matrix and
            :meth:`.BlockMatrix.svd` of `z` returns :math:`U` as a block matrix.
        """
        z_is_bm = isinstance(z, BlockMatrix)

        if z_is_bm and p_path is None:
            raise ValueError("from_random_effects: 'p_path' required when 'z'"
                             "is a block matrix.")

        if max_condition_number < 1e-16:
            raise ValueError("from_random_effects: 'max_condition_number' must "
                             f"be at least 1e-16, found {max_condition_number}")

        _check_dims(y, "y", 1)
        _check_dims(x, "x", 2)
        _check_dims(z, "z", 2)

        n, m = z.shape

        if y.shape[0] != n:
            raise ValueError("from_random_effects: 'y' and 'z' must have the "
                             "same number of rows")
        if x.shape[0] != n:
            raise ValueError("from_random_effects: 'x' and 'z' must have the "
                             "same number of rows")

        if z_is_bm:
            u, s0, _ = z.svd(complexity_bound=complexity_bound)
            p = u.T
            p_is_bm = isinstance(p, BlockMatrix)
        else:
            u, s0, _ = hl.linalg._svd(z, full_matrices=False)
            p = u.T
            p_is_bm = False

        s = s0 ** 2

        low_rank = n > m

        if low_rank:
            assert np.all(np.isfinite(s))
            r = np.searchsorted(-s, -max_condition_number * s[0])
            if r < m:
                info(f'from_random_effects: model rank reduced from {m} to {r} '
                     f'due to ill-condition.'
                     f'\n    Largest dropped eigenvalue was {s[r]}.')
            s = s[:r]
            p = p[:r, :]

        if p_path is not None:
            if p_is_bm:
                p.write(p_path, overwrite=overwrite)
                p = BlockMatrix.read(p_path)
            else:
                BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite)
        if p_is_bm:
            py, px = (p @ y.reshape(n, 1)).to_numpy().flatten(), (p @ x).to_numpy()
        else:
            py, px = p @ y, p @ x

        if low_rank:
            model = LinearMixedModel(py, px, s, y, x, p_path)
        else:
            model = LinearMixedModel(py, px, s, p_path=p_path)

        return model, p
示例#7
0
    def from_kinship(cls, y, x, k, p_path=None, overwrite=False):
        r"""Initializes a model from :math:`y`, :math:`X`, and :math:`K`.

        Examples
        --------
        >>> from hail.stats import LinearMixedModel
        >>> y = np.array([0.0, 1.0, 8.0, 9.0])
        >>> x = np.array([[1.0, 0.0],
        ...               [1.0, 2.0],
        ...               [1.0, 1.0],
        ...               [1.0, 4.0]])
        >>> k = np.array([[ 1.        , -0.8727875 ,  0.96397335,  0.94512946],
        ...               [-0.8727875 ,  1.        , -0.93036112, -0.97320323],
        ...               [ 0.96397335, -0.93036112,  1.        ,  0.98294169],
        ...               [ 0.94512946, -0.97320323,  0.98294169,  1.        ]])
        >>> model, p = LinearMixedModel.from_kinship(y, x, k)
        >>> model.fit()
        >>> model.h_sq
        0.2525148830695317

        >>> model.s
        array([3.83501295, 0.13540343, 0.02454114, 0.00504248])

        Truncate to a rank :math:`r=2` model:

        >>> r = 2
        >>> s_r = model.s[:r]
        >>> p_r = p[:r, :]
        >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x)
        >>> model.fit()
        >>> model.h_sq
        0.25193197591429695

        Notes
        -----
        This method eigendecomposes :math:`K = P^T S P` on the master and
        returns ``LinearMixedModel(p @ y, p @ x, s)`` and ``p``.

        The performance of eigendecomposition depends critically on the
        number of master cores and the NumPy / SciPy configuration, viewable
        with ``np.show_config()``. For Intel machines, we recommend installing
        the `MKL <https://anaconda.org/anaconda/mkl>`__ package for Anaconda, as
        is done by `cloudtools <https://github.com/Nealelab/cloudtools>`__.

        `k` must be positive semi-definite; symmetry is not checked as only the
        lower triangle is used.

        Parameters
        ----------
        y: :class:`ndarray`
            :math:`n` vector of observations.
        x: :class:`ndarray`
            :math:`n \times p` matrix of fixed effects.
        k: :class:`ndarray`
            :math:`n \times n` positive semi-definite kernel :math:`K`.
        p_path: :obj:`str`, optional
            Path at which to write :math:`P` as a block matrix.
        overwrite: :obj:`bool`
            If ``True``, overwrite an existing file at `p_path`.

        Returns
        -------
        model: :class:`LinearMixedModel`
            Model constructed from :math:`y`, :math:`X`, and :math:`K`.
        p: :class:`ndarray`
            Matrix :math:`P` whose rows are the eigenvectors of :math:`K`.
        """
        _check_dims(y, "y", 1)
        _check_dims(x, "x", 2)
        _check_dims(k, "k", 2)

        n = k.shape[0]
        if k.shape[1] != n:
            raise ValueError("from_kinship: 'k' must be a square matrix")
        if y.shape[0] != n:
            raise ValueError("from_kinship: 'y' and 'k' must have the same "
                             "number of rows")
        if x.shape[0] != n:
            raise ValueError("from_kinship: 'x' and 'k' must have the same "
                             "number of rows")

        s, u = hl.linalg._eigh(k)
        if s[0] < -1e12 * s[-1]:
            raise Exception("from_kinship: smallest eigenvalue of 'k' is"
                            f"negative: {s[0]}")

        # flip singular values to descending order
        s = np.flip(s, axis=0)
        u = np.fliplr(u)
        p = u.T
        if p_path:
            BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite)

        model = LinearMixedModel(p @ y, p @ x, s, p_path=p_path)
        return model, p
示例#8
0
    def __init__(self, py, px, s, y=None, x=None, p_path=None):
        if y is None and x is None:
            low_rank = False
        elif y is not None and x is not None:
            low_rank = True
        else:
            raise ValueError('for low-rank, set both y and x; for full-rank, do not set y or x.')

        _check_dims(py, 'py', 1)
        _check_dims(px, 'px', 2)
        _check_dims(s, 's', 1)

        r = s.size
        f = px.shape[1]

        if py.size != r:
            raise ValueError("py and s must have the same size")
        if px.shape[0] != r:
            raise ValueError("px must have the same number of rows as the size of s")
        if low_rank:
            _check_dims(y, 'y', 1)
            _check_dims(x, 'x', 2)
            n = y.size
            if n <= r:
                raise ValueError("size of y must be larger than the size of s")
            if x.shape[0] != n:
                raise ValueError("x must have the same number of rows as the size of y")
            if x.shape[1] != f:
                raise ValueError("px and x must have the same number columns")
        else:
            n = r

        if p_path is not None:
            n_rows, n_cols = BlockMatrix.read(p_path).shape
            if n_cols != n:
                raise ValueError("LinearMixedModel: Number of columns in the block "
                                 f"matrix at 'p_path' ({n_cols}) must equal "
                                 f"the size of 'y' ({n})")
            if n_rows != r:
                raise ValueError("LinearMixedModel: Number of rows in the block "
                                 f"matrix at 'p_path' ({n_rows}) must equal "
                                 f"the size of 'py' ({r})")

        self.low_rank = low_rank
        self.n = n
        self.f = f
        self.r = r
        self.py = py
        self.px = px
        self.s = s
        self.y = y
        self.x = x
        self.p_path = p_path

        self._check_dof()

        self.beta = None
        self.sigma_sq = None
        self.tau_sq = None
        self.gamma = None
        self.log_gamma = None
        self.h_sq = None
        self.h_sq_standard_error = None
        self.optimize_result = None

        self._fitted = False

        if low_rank:
            self._yty = y @ y
            self._xty = x.T @ y
            self._xtx = x.T @ x

        self._dof = n - f
        self._d = None
        self._ydy = None
        self._xdy = None
        self._xdx = None

        self._dof_alt = n - (f + 1)
        self._d_alt = None
        self._ydy_alt = None
        self._xdy_alt = np.zeros(f + 1)
        self._xdx_alt = np.zeros((f + 1, f + 1))

        self._residual_sq = None

        self._scala_model = None