def add_epistasis(self): """ Add an EpistasisMap to model. """ # Build epistasis interactions as columns in X matrix. sites = encoding_to_sites(self.order, self.encoding_table) # Map those columns to epistastalis dataframe. self.epistasis = DistributionSimulation(gpm=self, sites=sites, values=0)
def add_gpm(self, gpm): """Add a GenotypePhenotypeMap object to the epistasis model. """ self._gpm = gpm # Reset Xbuilt. self.Xbuilt = {} # Construct columns for X matrix self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table) # Map those columns to epistastalis dataframe. self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm) return self
def _genotypes_to_X(genotypes, gpm, order=1, model_type='global'): """ Build an X matrix for a list of genotypes. Parameters ---------- genotypes : list-like list of genotypes matching genotypes seen in gpm gpm : gpmap.GenotypePhenotypeMap genotype phenotype map that has an encoding table for converting the genotypes to binary order : int order of epistasis for generating the X matrix. model_type : str should be 'global' or 'local', indicating what reference state to use for the epistasis mode. Returns ------- X : np.ndarray binary array indicating which epistatic coefficients should be applied to which genotype. """ # Make sure gneotypes are unique if len(set(genotypes)) != len(genotypes): err = "genotypes must be unique when constructing an X matrix\n" raise ValueError(err) # Make sure genotypes are in the volume described by the map is_in = gpm.genotype_is_in(genotypes) if len(genotypes) == 1: is_in = [is_in] if np.sum(is_in) < len(is_in): err = "all genotypes for constructing an X matrix must be in the\n" err += "attached gpmap\n." raise ValueError(err) # But a sites list. sites = encoding_to_sites(order, gpm.encoding_table) binary = gpmap.utils.genotypes_to_binary(genotypes, gpm.encoding_table) # X matrix X = get_model_matrix(binary, sites, model_type=model_type) return X
def test_get_model_matrix(test_data): for d in test_data: gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"]) for model_type in ["global", "local"]: for i in range(gpm.length): # Get sites for this site sites = mapping.encoding_to_sites(i + 1, gpm.encoding_table) for use_cython in [True, False]: X = m.get_model_matrix(gpm.binary, sites, model_type=model_type, use_cython=use_cython) if use_cython: cython_X = np.copy(X) else: # Make sure python and cython give same answer assert np.array_equal(cython_X, X)
def add_gpm(self, gpm, genotype_column="genotype", phenotype_column=None, uncertainty_column=None): """ Add a GenotypePhenotypeMap object to the epistasis model. Parameters ---------- gpm : gpmap.GenotypePhenotypeMap genotype phenotype map with genotypes and phenotypes genotype_column : str name of the genotype column in the gpm phenotype_column : str name of the phenotype column in the gpm. If None, take the first numeric column beside the genotype_column in the gpm uncertainty_column : str name of column with phenotype uncertainty in gpm. if None, make a column `epi_zero_uncertainty` with 1e-6*np.min(phenotype) """ # Make sure gpm is a GenotypePhenotypeMap and append it if not isinstance(gpm, gpmap.GenotypePhenotypeMap): err = "gpm must be a gpmap.GenotypePhenotypeMap instance\n" raise TypeError(err) self._gpm = gpm # Make sure attached genotype-phenotype map has the specified genotype # column. if type(genotype_column) is not str: err = f"invalid genotype_column {genotype_column}. Should be a\n" err += "column name (string)\n" raise TypeError(err) try: self._gpm.data.loc[:, genotype_column] except KeyError: err = "gpm does not have the specified genotype_column\n" err += f"'{genotype_column}'\n" raise KeyError(err) self._genotype_column = genotype_column # If the phenotype_column is not specified, grab the first numeric # non-reserved column if phenotype_column is None: for c in self._gpm.data.columns: if c not in gpmap.reserved_data_columns: if np.issubdtype(self._gpm.data.loc[:, c].dtype, np.number): phenotype_column = c break # If no phenotype column was found if phenotype_column is None: err = "No phenotype column was specified and none was found in\n" err += "the GenotypePhenotypeMap.\n" raise ValueError(err) # Make sure attached genotype-phenotype map has the specified phenotype # column and that this column is numeric. try: self._gpm.data.loc[:, phenotype_column] except KeyError: err = "gpm does not have the specified phenotype_column\n" err += f"'{phenotype_column}'\n" raise KeyError(err) if not np.issubdtype(self._gpm.data.loc[:, phenotype_column].dtype, np.number): err = f"'{phenotype_column}' must be numeric\n" raise ValueError(err) self._phenotype_column = phenotype_column # If uncertainty_column is not specified, make a new fake uncertainty # column with a value of 0.0 if uncertainty_column is None: uncertainty_column = "epi_zero_uncertainty" v = np.min(np.abs(self._gpm.data.loc[:, phenotype_column])) * 1e-6 self._gpm.data.loc[:, "epi_zero_uncertainty"] = v else: if uncertainty_column == self._phenotype_column: err = "phenotype_column and uncertainty_column cannot be the same\n" raise ValueError(err) # Make sure attached genotype-phenotype map has the specified uncertainty # column and that this column is numeric. try: self._gpm.data.loc[:, uncertainty_column] except KeyError: err = "gpm does not have the specified uncertainty_column\n" err += f"'{uncertainty_column}'\n" raise KeyError(err) if not np.issubdtype(self._gpm.data.loc[:, uncertainty_column].dtype, np.number): err = f"'{uncertainty_column}' must be numeric\n" raise ValueError(err) self._uncertainty_column = uncertainty_column # Construct columns for X matrix self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table) # Map those columns to epistasis dataframe. self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm) # Wipe out previous X (or create empty previous X) because we just # added a new gpmap self._previous_X = None return self