def __init__(self, length, order, coeff_range=(-1, 1), distribution=None, model_type='local', neg_coeffs=True ): wildtype = "0"*length mutations = binary_mutations_map(wildtype, "1"*length) # Initialize a genotype-phenotype map super(NkSimulation, self).__init__( wildtype, mutations, log_transform=False, ) self.epistasis = EpistasisMap(self) # Construct the NK epistasis table. self.epistasis._order = order keys = np.array(["".join(r) for r in it.product('01', repeat=self.epistasis.order)]) vals = np.empty(len(keys), dtype=float) for i,key in enumerate(keys): m = key.count('1') vals[i] = np.random.uniform(coeff_range[0], coeff_range[1]) self.epistasis.keys = keys self.epistasis.values = vals # Build the genotype-phenotype map. self.build()
def X_constructor(self, genotypes=None, coeff_labels=None, mutations=None, **kwargs): """A helper method that constructs an X matrix for this model. Attaches an `EpistasisMap` object to the `epistasis` attribute of the model. The simplest way to construct X is to give a set of binary genotypes and epistatic labels. If not given, will try to infer these features from an attached genotype-phenotype map. If no genotype-phenotype map is attached, raises an exception. Parameters ---------- genotypes : list list of genotypes. coeff_labels: list list of lists. Each sublist contains site-indices that represent participants in that epistatic interaction. mutations : dict mutations dictionary mapping sites to alphabet at the site. """ # First check genotypes are available if genotypes is None: try: genotypes = self.gpm.binary.genotypes except AttributeError: raise AttributeError( "genotypes must be given, because no GenotypePhenotypeMap is attached to this model." ) # Build epistasis map if coeff_labels is None: # See if an epistasis map was already created if hasattr(self, "epistasis") is False: # Mutations dictionary given? if not, try to infer one. if mutations is None: try: mutations = self.gpm.mutations except AttributeError: mutations = extract_mutations_from_genotypes(genotypes) # Construct epistasis mapping self.epistasis = EpistasisMap.from_mutations( mutations, self.order, model_type=self.model_type) else: self.epistasis = EpistasisMap.from_labels( coeff_labels, model_type=self.model_type) # Construct the X matrix (convert to binary if necessary). try: return generate_dv_matrix(genotypes, self.epistasis.labels, model_type=self.model_type) except: mapping = self.gpm.map("complete_genotypes", "binary.complete_genotypes") binaries = [mapping[g] for g in genotypes] return generate_dv_matrix(binaries, self.epistasis.labels, model_type=self.model_type)
def __init__(self, wildtype, genotypes, phenotypes, stdeviations=None, log_transform=False, mutations=None, n_replicates=1, logbase=np.log10): # Defaults to binary mapping if not specific mutations are named if mutations is None: mutant = farthest_genotype(wildtype, genotypes) mutations = binary_mutations_map(wildtype, mutant) super(BaseModel, self).__init__(wildtype, genotypes, phenotypes, stdeviations=stdeviations, log_transform=log_transform, mutations=mutations, n_replicates=n_replicates, logbase=logbase) # Attach the epistasis model. self.epistasis = EpistasisMap(self) # Add plotting object if matplotlib is installed try: self.plot = EpistasisPlotting(self) except Warning: pass
def __init__(self, wildtype, mutations, log_transform=False, logbase=np.log10, **kwargs): genotypes = np.array(utils.mutations_to_genotypes(wildtype, mutations)) phenotypes = np.ones(len(genotypes)) # Initialize a genotype-phenotype map super(BaseSimulation, self).__init__(wildtype, genotypes, phenotypes, log_transform=log_transform, logbase=logbase, mutations=mutations, **kwargs) # Attach an epistasis model. self.epistasis = EpistasisMap()
def add_gpm(self, gpm): """Add a GenotypePhenotypeMap object to the epistasis model. """ self._gpm = gpm # Reset Xbuilt. self.Xbuilt = {} # Construct columns for X matrix self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table) # Map those columns to epistastalis dataframe. self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm) return self
def _fit_additive(self, X=None, y=None): # Construct an additive model. self.Additive = EpistasisLinearRegression( order=1, model_type=self.model_type) self.Additive.add_gpm(self.gpm) # Prepare a high-order model self.Additive.epistasis = EpistasisMap( sites=self.Additive.Xcolumns, ) # Fit the additive model and infer additive phenotypes self.Additive.fit(X=X, y=y) return self
def _fit_additive(self, X=None, y=None, **kwargs): # Fit with an additive model self.Additive.epistasis = EpistasisMap( sites=self.Additive.Xcolumns, ) # Use a first order matrix only. if type(X) == np.ndarray or type(X) == pd.DataFrame: Xadd = X[:, :self.Additive.epistasis.n] else: Xadd = X # Fit Additive model self.Additive.fit(X=Xadd, y=y) self.Additive.epistasis.values = self.Additive.coef_ return self
def __init__(self, wildtype, mutations, order, coeff_range=(-1, 1), model_type='local'): # Construct epistasis mapping objects (empty) super(MultiplicativeSimulation, self).__init__( wildtype, mutations, log_transform=True, ) self.model_type = model_type self.epistasis = EpistasisMap(self) # Add values to epistatic interactions self.epistasis.order = order self.epistasis.values = self.base**np.random.uniform( coeff_range[0], coeff_range[1], size=len(self.epistasis.keys)) # build the phenotypes from the epistatic interactions self.build()
class NkSimulation(BaseSimulation): """ Generate genotype-phenotype map from NK fitness models. """ def __init__(self, length, order, coeff_range=(-1, 1), distribution=None, model_type='local', neg_coeffs=True ): wildtype = "0"*length mutations = binary_mutations_map(wildtype, "1"*length) # Initialize a genotype-phenotype map super(NkSimulation, self).__init__( wildtype, mutations, log_transform=False, ) self.epistasis = EpistasisMap(self) # Construct the NK epistasis table. self.epistasis._order = order keys = np.array(["".join(r) for r in it.product('01', repeat=self.epistasis.order)]) vals = np.empty(len(keys), dtype=float) for i,key in enumerate(keys): m = key.count('1') vals[i] = np.random.uniform(coeff_range[0], coeff_range[1]) self.epistasis.keys = keys self.epistasis.values = vals # Build the genotype-phenotype map. self.build() @classmethod def quick_start(cls, length, order, **kwargs): """Construct the genotype-phenotype map""" return cls(length, order, **kwargs) def build(self): """Build phenotypes from NK table """ nk_table = self.epistasis.map("keys", "values") # Check for even interaction neighbor = int(self.epistasis.order/2) if self.epistasis.order%2 == 0: pre_neighbor = neighbor - 1 else: pre_neighbor = neighbor # Use NK table to build phenotypes phenotypes = np.zeros(self.n, dtype=float) for i in range(len(self.genotypes)): f_total = 0 for j in range(self.length): if j-pre_neighbor < 0: pre = self.genotypes[i][-pre_neighbor:] post = self.genotypes[i][j:neighbor+j+1] f = "".join(pre) + "".join(post) elif j+neighbor > self.length-1: pre = self.genotypes[i][j-pre_neighbor:j+1] post = self.genotypes[i][0:neighbor] f = "".join(pre) + "".join(post) else: f = "".join(self.genotypes[i][j-pre_neighbor:j+neighbor+1]) f_total += nk_table[f] phenotypes[i] = f_total self.phenotypes = phenotypes
class BaseSimulation(GenotypePhenotypeMap): """ Base class for simulating genotype-phenotype maps built from epistatic interactions. Parameters ---------- wildtype : str wildtype sequence. mutations : dict dictionary mapping each site the possible mutations """ def __init__(self, wildtype, mutations, log_transform=False, logbase=np.log10, **kwargs): genotypes = np.array(utils.mutations_to_genotypes(wildtype, mutations)) phenotypes = np.ones(len(genotypes)) # Initialize a genotype-phenotype map super(BaseSimulation, self).__init__(wildtype, genotypes, phenotypes, log_transform=log_transform, logbase=logbase, mutations=mutations, **kwargs) # Attach an epistasis model. self.epistasis = EpistasisMap() @assert_epistasis def set_coefs_order(self, order): """Set coefs from an epistatic order.""" self.epistasis._from_mutations(self.mutations, order) @assert_epistasis def set_coefs_labels(self, labels): """Set coefs from list of coefs labels. """ self.epistasis.labels = labels @assert_epistasis def set_coefs(self, labels, values): """Set the epistatic coefs Parameters ---------- labels : List List of epistatic coefficient labels. values : List list of floats representing to epistatic coefficients. """ self.epistasis.labels = labels self.epistasis.values = values self.build() @assert_epistasis def set_coefs_values(self, values): """Set coefficient values. """ self.epistasis.values = values self.build() @assert_epistasis def set_coefs_random(self, coef_range): """Set coefs to values drawn from a random, uniform distribution between coef_range. Parameters ---------- coef_range : 2-tuple low and high bounds for coeff values. """ # Add values to epistatic interactions self.epistasis.values = np.random.uniform(coef_range[0], coef_range[1], size=len( self.epistasis.labels)) self.build() @classmethod def from_length(cls, length, **kwargs): """Constructs genotype from binary sequences with given length and phenotypes from epistasis with a given order. Parameters ---------- length : int length of the genotypes order : int order of epistasis in phenotypes. Returns ------- GenotypePhenotypeMap """ wildtype = "0" * length mutations = utils.binary_mutations_map(wildtype, "1" * length) return cls(wildtype, mutations, **kwargs) @classmethod def from_coefs(cls, wildtype, mutations, labels, coefs, model_type="local", **kwargs): """Construct a genotype-phenotype map from epistatic coefficients. Parameters ---------- wildtype : str wildtype sequence mutations : dict dictionary mapping each site to their possible mutations. order : int order of epistasis coefs : list or array epistatic coefficients model_type : str epistatic model to use in composition matrix. (`'global'` or `'local'`) Returns ------- GenotypePhenotypeMap """ order = max([len(l) for l in labels]) self = cls(wildtype, mutations, model_type=model_type, **kwargs) if len(betas) != space.epistasis.n: raise Exception( """Number of betas does not match order/mutations given.""") self.set_coefs(labels, coefs) return self def build(self, values=None, **kwargs): """ Method for construction phenotypes from model. """ raise Exception("""Must be implemented in subclass. """) def set_stdeviations(self, sigma): """Add standard deviations to the simulated phenotypes, which can then be used for sampling error in the genotype-phenotype map. Parameters ---------- sigma : float or array-like Adds standard deviations to the phenotypes. If float, all phenotypes are given the same stdeviations. Else, array must be same length as phenotypes and will be assigned to each phenotype. """ stdeviations = np.ones(len(self.phenotypes)) * sigma self.stdeviations = stdeviations
def add_gpm(self, gpm, genotype_column="genotype", phenotype_column=None, uncertainty_column=None): """ Add a GenotypePhenotypeMap object to the epistasis model. Parameters ---------- gpm : gpmap.GenotypePhenotypeMap genotype phenotype map with genotypes and phenotypes genotype_column : str name of the genotype column in the gpm phenotype_column : str name of the phenotype column in the gpm. If None, take the first numeric column beside the genotype_column in the gpm uncertainty_column : str name of column with phenotype uncertainty in gpm. if None, make a column `epi_zero_uncertainty` with 1e-6*np.min(phenotype) """ # Make sure gpm is a GenotypePhenotypeMap and append it if not isinstance(gpm, gpmap.GenotypePhenotypeMap): err = "gpm must be a gpmap.GenotypePhenotypeMap instance\n" raise TypeError(err) self._gpm = gpm # Make sure attached genotype-phenotype map has the specified genotype # column. if type(genotype_column) is not str: err = f"invalid genotype_column {genotype_column}. Should be a\n" err += "column name (string)\n" raise TypeError(err) try: self._gpm.data.loc[:, genotype_column] except KeyError: err = "gpm does not have the specified genotype_column\n" err += f"'{genotype_column}'\n" raise KeyError(err) self._genotype_column = genotype_column # If the phenotype_column is not specified, grab the first numeric # non-reserved column if phenotype_column is None: for c in self._gpm.data.columns: if c not in gpmap.reserved_data_columns: if np.issubdtype(self._gpm.data.loc[:, c].dtype, np.number): phenotype_column = c break # If no phenotype column was found if phenotype_column is None: err = "No phenotype column was specified and none was found in\n" err += "the GenotypePhenotypeMap.\n" raise ValueError(err) # Make sure attached genotype-phenotype map has the specified phenotype # column and that this column is numeric. try: self._gpm.data.loc[:, phenotype_column] except KeyError: err = "gpm does not have the specified phenotype_column\n" err += f"'{phenotype_column}'\n" raise KeyError(err) if not np.issubdtype(self._gpm.data.loc[:, phenotype_column].dtype, np.number): err = f"'{phenotype_column}' must be numeric\n" raise ValueError(err) self._phenotype_column = phenotype_column # If uncertainty_column is not specified, make a new fake uncertainty # column with a value of 0.0 if uncertainty_column is None: uncertainty_column = "epi_zero_uncertainty" v = np.min(np.abs(self._gpm.data.loc[:, phenotype_column])) * 1e-6 self._gpm.data.loc[:, "epi_zero_uncertainty"] = v else: if uncertainty_column == self._phenotype_column: err = "phenotype_column and uncertainty_column cannot be the same\n" raise ValueError(err) # Make sure attached genotype-phenotype map has the specified uncertainty # column and that this column is numeric. try: self._gpm.data.loc[:, uncertainty_column] except KeyError: err = "gpm does not have the specified uncertainty_column\n" err += f"'{uncertainty_column}'\n" raise KeyError(err) if not np.issubdtype(self._gpm.data.loc[:, uncertainty_column].dtype, np.number): err = f"'{uncertainty_column}' must be numeric\n" raise ValueError(err) self._uncertainty_column = uncertainty_column # Construct columns for X matrix self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table) # Map those columns to epistasis dataframe. self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm) # Wipe out previous X (or create empty previous X) because we just # added a new gpmap self._previous_X = None return self