Пример #1
0
    def add_epistasis(self):
        """
        Add an EpistasisMap to model.
        """
        # Build epistasis interactions as columns in X matrix.
        sites = encoding_to_sites(self.order, self.encoding_table)

        # Map those columns to epistastalis dataframe.
        self.epistasis = DistributionSimulation(gpm=self,
                                                sites=sites,
                                                values=0)
Пример #2
0
    def add_gpm(self, gpm):
        """Add a GenotypePhenotypeMap object to the epistasis model.
        """
        self._gpm = gpm

        # Reset Xbuilt.
        self.Xbuilt = {}

        # Construct columns for X matrix
        self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table)

        # Map those columns to epistastalis dataframe.
        self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm)
        return self
Пример #3
0
def _genotypes_to_X(genotypes, gpm, order=1, model_type='global'):
    """
    Build an X matrix for a list of genotypes.

    Parameters
    ----------
    genotypes : list-like
        list of genotypes matching genotypes seen in gpm
    gpm : gpmap.GenotypePhenotypeMap
        genotype phenotype map that has an encoding table for converting the
        genotypes to binary
    order : int
        order of epistasis for generating the X matrix.
    model_type : str
        should be 'global' or 'local', indicating what reference state to use
        for the epistasis mode.

    Returns
    -------
    X : np.ndarray
        binary array indicating which epistatic coefficients should be applied
        to which genotype.
    """

    # Make sure gneotypes are unique
    if len(set(genotypes)) != len(genotypes):
        err = "genotypes must be unique when constructing an X matrix\n"
        raise ValueError(err)

    # Make sure genotypes are in the volume described by the map
    is_in = gpm.genotype_is_in(genotypes)
    if len(genotypes) == 1:
        is_in = [is_in]
    if np.sum(is_in) < len(is_in):
        err = "all genotypes for constructing an X matrix must be in the\n"
        err += "attached gpmap\n."
        raise ValueError(err)

    # But a sites list.
    sites = encoding_to_sites(order, gpm.encoding_table)
    binary = gpmap.utils.genotypes_to_binary(genotypes, gpm.encoding_table)

    # X matrix
    X = get_model_matrix(binary, sites, model_type=model_type)

    return X
Пример #4
0
def test_get_model_matrix(test_data):

    for d in test_data:
        gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"])

        for model_type in ["global", "local"]:
            for i in range(gpm.length):

                # Get sites for this site
                sites = mapping.encoding_to_sites(i + 1, gpm.encoding_table)

                for use_cython in [True, False]:

                    X = m.get_model_matrix(gpm.binary,
                                           sites,
                                           model_type=model_type,
                                           use_cython=use_cython)

                    if use_cython:
                        cython_X = np.copy(X)
                    else:
                        # Make sure python and cython give same answer
                        assert np.array_equal(cython_X, X)
Пример #5
0
    def add_gpm(self,
                gpm,
                genotype_column="genotype",
                phenotype_column=None,
                uncertainty_column=None):
        """
        Add a GenotypePhenotypeMap object to the epistasis model.

        Parameters
        ----------
        gpm : gpmap.GenotypePhenotypeMap
            genotype phenotype map with genotypes and phenotypes
        genotype_column : str
            name of the genotype column in the gpm
        phenotype_column : str
            name of the phenotype column in the gpm. If None, take the first
            numeric column beside the genotype_column in the gpm
        uncertainty_column : str
            name of column with phenotype uncertainty in gpm. if None, make a
            column `epi_zero_uncertainty` with 1e-6*np.min(phenotype)
        """

        # Make sure gpm is a GenotypePhenotypeMap and append it
        if not isinstance(gpm, gpmap.GenotypePhenotypeMap):
            err = "gpm must be a gpmap.GenotypePhenotypeMap instance\n"
            raise TypeError(err)

        self._gpm = gpm

        # Make sure attached genotype-phenotype map has the specified genotype
        # column.
        if type(genotype_column) is not str:
            err = f"invalid genotype_column {genotype_column}. Should be a\n"
            err += "column name (string)\n"
            raise TypeError(err)

        try:
            self._gpm.data.loc[:, genotype_column]
        except KeyError:
            err = "gpm does not have the specified genotype_column\n"
            err += f"'{genotype_column}'\n"
            raise KeyError(err)

        self._genotype_column = genotype_column

        # If the phenotype_column is not specified, grab the first numeric
        # non-reserved column
        if phenotype_column is None:
            for c in self._gpm.data.columns:
                if c not in gpmap.reserved_data_columns:
                    if np.issubdtype(self._gpm.data.loc[:, c].dtype,
                                     np.number):
                        phenotype_column = c
                        break

        # If no phenotype column was found
        if phenotype_column is None:
            err = "No phenotype column was specified and none was found in\n"
            err += "the GenotypePhenotypeMap.\n"
            raise ValueError(err)

        # Make sure attached genotype-phenotype map has the specified phenotype
        # column and that this column is numeric.
        try:
            self._gpm.data.loc[:, phenotype_column]
        except KeyError:
            err = "gpm does not have the specified phenotype_column\n"
            err += f"'{phenotype_column}'\n"
            raise KeyError(err)
        if not np.issubdtype(self._gpm.data.loc[:, phenotype_column].dtype,
                             np.number):
            err = f"'{phenotype_column}' must be numeric\n"
            raise ValueError(err)

        self._phenotype_column = phenotype_column

        # If uncertainty_column is not specified, make a new fake uncertainty
        # column with a value of 0.0
        if uncertainty_column is None:
            uncertainty_column = "epi_zero_uncertainty"
            v = np.min(np.abs(self._gpm.data.loc[:, phenotype_column])) * 1e-6
            self._gpm.data.loc[:, "epi_zero_uncertainty"] = v
        else:
            if uncertainty_column == self._phenotype_column:
                err = "phenotype_column and uncertainty_column cannot be the same\n"
                raise ValueError(err)

        # Make sure attached genotype-phenotype map has the specified uncertainty
        # column and that this column is numeric.
        try:
            self._gpm.data.loc[:, uncertainty_column]
        except KeyError:
            err = "gpm does not have the specified uncertainty_column\n"
            err += f"'{uncertainty_column}'\n"
            raise KeyError(err)
        if not np.issubdtype(self._gpm.data.loc[:, uncertainty_column].dtype,
                             np.number):
            err = f"'{uncertainty_column}' must be numeric\n"
            raise ValueError(err)

        self._uncertainty_column = uncertainty_column

        # Construct columns for X matrix
        self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table)

        # Map those columns to epistasis dataframe.
        self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm)

        # Wipe out previous X (or create empty previous X) because we just
        # added a new gpmap
        self._previous_X = None

        return self