def get_forward_neighbors(source, current, mutations): """List all neighbors that are a single a mutation away from genotype and move away from the source. Parameters ---------- source : str source genotype which determines the direction to be moving away. current: str reference genotype. mutations : dict sites (keys) mapped to an alphabet list in genotype space (values). Returns ------- neighbors : list List of neighbor genotypes """ s_sites = list(source) sites = list(current) hd = hamming_distance(source, current) neighbors = [] for i, alphabet in mutations.items(): if alphabet is not None: # Copy alphabet to avoid over-writing alphabet = alphabet[:] alphabet.remove(sites[i]) # Replace letters for a in alphabet: g = sites[:] g[i] = a if hamming_distance(source, g) > hd: neighbors.append("".join(g)) return neighbors
def test_hamming_distance(): """ Test hamming distance function. """ test_pairs = [("THIS IS A TEST", "HWIS IT A TEBT", 4), ("ROCKING", "ROCKING", 0)] for p in test_pairs: assert utils.hamming_distance(p[0], p[1]) == p[2] with pytest.raises(ValueError): utils.hamming_distance("TEST", "NOT") with pytest.raises(ValueError): utils.hamming_distance(0, "NOT")
def mean_path_divergence(G, paths): """Calculate the divergence of a paths ensemble according to Lobkovsky, 2011 [1]. Parameters ---------- G : GenotypePhenotypeGraph object. Any GenotypePhenotypeGraph object or objects of classes that inherit from one, like GenotypePhenotypeMSM. paths : dict. Dictionary of paths (keys) and probabilities (values). Example: {(0,1,3): 0.9, (0,2,3): 0.1} Returns ------- divergence : float. A measure of divergence published as equation (2) in [1]. References ---------- [1] A. E. Lobkovsky, Y. I. Wolf, and E. V. Koonin. Predictability of evolutionary trajecto- ries in fitness landscapes. PLoS Comput. Biol., 7:e1002302, 2011. """ # Get all possible pairwise combinations of paths. ppairs = itertools.combinations(paths, 2) divergence = 0 for ppair in ppairs: ppair_hdist = 0 # Set combined length of pair l = len(ppair[0]) + len(ppair[1]) for i, path in enumerate(ppair): # Define other path other_path = ppair[abs(i - 1)] for node in path: # Repeat node, so we can get all combinations of # that node with all nodes of the other path. a = [node] * len(other_path) npairs = zip(a, other_path) for npair in npairs: # Get hamming distance ppair_hdist += hamming_distance(G.node[npair[0]]["binary"], G.node[npair[1]]["binary"]) # Distance between paths. ppair_dist = ppair_hdist / l # Get both path probabilities. path_probs = list(paths.values()) # Add divergence of this pair to total divergence divergence += ppair_dist * path_probs[0] * path_probs[1] return divergence
def forward_paths(paths, msm, source, target): fp = [] comb = combinations(source, target) min_dist = hamming_distance(msm.gpm.data.binary[source[0]], msm.gpm.data.binary[target[0]]) for path in paths: if len(path) - 1 == min_dist: fp.append(path) return fp
def hamming(self): """Hamming distance from reference""" try: return self._hamming # calculate the hamming distance if not done already except AttributeError: hd = np.empty(self.n, dtype=int) for i, g in enumerate(self.genotypes): hd[i] = utils.hamming_distance(self.wildtype, g) self._hamming = hd return self._hamming
def hamming(self): """Hamming distances from each peak""" try: return self._hamming # calculate the hamming distance if not done already except AttributeError: hd = np.empty([len(self.peaks), len(self.genotypes)], dtype=int) for i, peak in enumerate(self.peaks): for j, g in enumerate(self.genotypes): hd[i][j] = utils.hamming_distance(peak, g) self._hamming = hd return self._hamming
def adaptive_walk(lattice, n_mutations): """Given a lattice object, adaptive walk to a sequence n_mutations away. Only works for <10 conformations in the landscapes!!! """ # Sanity check if type(lattice) != LatticeThermodynamics: raise TypeError("lattice must be a LatticeThermodynamics object") elif len(lattice.conf_list) > 10: raise Exception( "too many conformations to compute in a reasonable time.") wildtype = lattice.sequence mutant = list(wildtype) hamming = 0 indices = list(range(len(wildtype))) fracfolded = lattice.fracfolded failed = 0 while hamming < n_mutations and failed < 100: # Select a site to mutate mut = mutant[:] index = random.choice(indices) # Choose a mutation mutation = random.choice(AMINO_ACIDS) mut[index] = mutation # New lattice mlattice = LatticeThermodynamics( "".join(mut), lattice.conf_list, lattice.temperature, interaction_energies=lattice.interaction_energies) if mlattice.fracfolded > fracfolded and mlattice.native_conf == lattice.native_conf: indices.remove(index) mutant[index] = mutation hamming = hamming_distance(wildtype, mutant) fracfolded = mlattice.fracfolded else: failed += 1 if failed == 100: raise Exception("No adaptive paths n_mutations away.") return mlattice
def peaks(self): if self._peaks: return self._peaks else: """Find n peaks that meet the max_dist/min_dist requirement""" self._peaks = [self.b_state, self.a_state] while len(self._peaks) < self.peak_n: proposed = random.choice(self.genotypes) # Propose a new peak. add = False for peak in self._peaks: dist = utils.hamming_distance(peak, proposed) if dist >= self.min_dist and dist <= self.max_dist: # Check dist. requirements add = True else: add = False break if add: self._peaks.append(proposed) return self._peaks
def adaptive_walk2(seq, n_mutations, temp=1.0, target=None): """ """ length = len(seq) c = Conformations(length, database) dGdependence = "fracfolded" wildtype = seq mutant = list(wildtype) hamming = 0 indices = list(range(len(wildtype))) fracfolded = lattice.fracfolded attempts = 0 path = [] fitness = Fitness(temp, c, dGdependence=dGdependence, targets=target) while hamming < n_mutations and attempts < 100: # Calculate stability of all amino acids at all sites AA_grid = np.array([AMINO_ACIDS] * length) dG = np.zeros(AA_grid.shape, dtype=float) for (i, j), AA in np.ndenumerate(AA_grid): seq1 = mutant[:] seq1[i] = AA_grid[i, j] fitness.Fitness(seq1) x, y = np.where(dG=dG.max) best_AA = AA_grid[x[0], y[0]] mutant[x[0]] = best_AA path.append(mutant) hamming = hamming_distance(wildtype, mutant) attempts += 0 if failed == 100: raise Exception("No adaptive paths n_mutations away.") return path