def going_beyond(): from load import load_nitrogenase_seq nitrogenase = load_nitrogenase_seq() #print nitrogenase from load import load_metagenome metagenome = load_metagenome() longest_snippet = "" k = 0 while k < len(metagenome): i = 0 while i < len(nitrogenase): j = 0 while j < len(metagenome[k][1]): length = 0 while (i + length < len(nitrogenase)) and (j + length < len(metagenome[k][1])) and (nitrogenase[i + length] == metagenome[k][1][j + length]): length += 1 if length > len(longest_snippet): longest_snippet = nitrogenase[i:i+length] j += 1 + length #adding length here makes the program run a little faster i += 1 k += 1 return longest_snippet
def substring_checkc(): """Returns the parts of the string that match""" nitrogenase = load_nitrogenase_seq() metagenome = load_metagenome()#So, this actually is a list of tubles with the name of a seqquence and then the sequence. for i in metagenome: print(i) pass
def nitrogenase_substring(): """ finds the metagenome with the longest substring in common with the nitrogenase sequence. I have no idea if it works, but it spit out an answer with no errors after over an hour of running, so i'm considering that success. """ import load nit_seq = load.load_nitrogenase_seq() metagenomes = load.load_metagenome() subs = [] for meta in metagenomes: subs.append((meta[0], longest_substring(meta[1], nit_seq))) return max(subs, key=lambda s:s[1])[0]
import random from amino_acids import aa, codons, aa_table # you may find these useful #Importing the metagenome from load import load_metagenome metagenome = load_metagenome() # metagenome = 'ATGGGAAAACTCCGGCAGATCGCTTTCTACGGCAAGGGCGGGATCGGCAAGTCGACGACCTCGCAGAACACCCTCGCGGCACTGGTCGAGATGGGTCAGAAGATCCTCATCGTCGGCTGCGATCCCAAGGCCGACTCGACCCGCCTGATCCTGAACACCAAGCTGCAGGACACCGTGCTTCACCTCGCCGCCGAAGCGGGCTCCGTCGAGGATCTCGAACTCGAGGATGTGGTCAAGATCGGCTACAAGGGCATCAAATGCACCGAAGCCGGCGGGCCGGAGCCGGGCGTGGGCTGCGCGGGCCGCGGCGTCATCACCGCCATCAACTTCCTGGAAGAGAACGGCGCCTATGACGACGTCGACTACGTCTCCTACGACGTGCTGGGCGACGTGGTCTGCGGCGGCTTCGCCATGCCGATCCGCGAGAACAAGGCGCAGGAAATCTACATCGTCATGTCGGGCGAGATGATGGCGCTCTATGCGGCCAACAACATCGCCAAGGGCATCCTGAAATACGCGAACTCGGGCGGCGTGCGCCTCGGCGGCCTGATCTGCAACGAGCGCAAGACCGACCGCGAGCTGGAACTGGCCGAGGCCCTCGCCGCGCGTCTGGGCTGCAAGATGATCCACTTCGTTCCGCGCGACAATATCGTGCAGCACGCCGAGCTCCGCCGCGAGACGGTCATCCAGTATGCGCCCGAGAGCAAGCAGGCGCAGGAATATCGCGAACTGGCCCGCAAGATCCACGAGAACTCGGGCAAGGGCGTGATCCCGACCCCGATCACCATGGAAGAGCTGGAAGAGATGCTGATGGATTTCGGCATCATGCAGTCCGAGGAAGACCGGCTCGCCGCCATCGCCGCCGCCGAGGCCTGA' #Loading the nitrogenase from load import load_seq from load import load_nitrogenase_seq nitrogenase = load_nitrogenase_seq() def get_complement(nucleotide): """ Returns the complementary nucleotide nucleotide: a nucleotide (A, C, G, or T) represented as a string returns: the complementary nucleotide >>> get_complement('A') 'T' >>> get_complement('C') 'G' >>> get_complement('G') 'C' >>> get_complement('T') 'A' >>> get_complement('L')
from load import load_nitrogenase_seq from load import load_metagenome def longestSubstrings(meta, nitrogen): ans = [] for m in meta: length = 0 longest = '' for x in range(len(nitrogen)): span = 1 while (nitrogen[x:x + span] in m[1] and x + span < len(nitrogen)): if (span > length): length = span longest = nitrogen[x:x + span] span += 1 if (length >= 20): ans.append([m[0], length, longest]) return ans if __name__ == '__main__': nitrogen = load_nitrogenase_seq().replace('\n', '') meta = load_metagenome() print(longestSubstrings(meta, nitrogen))
import pickle import sys from distance import levenshtein from gene_finder import * from load import load_metagenome metagenome = load_metagenome() # pauls_seq = 'GCCCGGACATTCTACATCTCCGCGAAAACACACACTTTTTCGTCTCCGGCGAAGCTTGGCACGCTCGTTGCAAAACAGGGATCAGCAAGGCGAGGGATGGTTGGCCGAGCAGTTACTGCAAAGGGCAACGTCCGCATCTGAGCCGTGCGACGGTTTTGAACGGAAGAAGGCTGCGCCTCGGCGCAAATCGATCAAGCGGCATTAGGTCAACGGAGAGAAAACATGGCACTTCGGCAAATCGCATTCTACGGCAAGGGCGGCATCGGCAAGTCGACCACCTCGCAGAACACCCTCGCGGCGCTGGTTGAGATGGGTCAGAAGATCCTGATCGTCGGCTGCGACCCCAAGGCGGACTCCACCCGTCTGATCCTCAACACCAAGATGCAGGACACGGTGCTGAGCCTCGCCGCGGAAGCGGGTTCGGTGGAAGACCTCGAACTCGAAGACGTGATGAAGATCGGCTACAAGGGCATCAAGTGCACCGAAGCCGGTGGCCCGGAGCCGGGCGTCGGCTGCGCCGGCCGCGGCGTTATCACCGCGATCAACTTCCTCGAAGAAAACGGCGCCTATGAAGACGTCGACTACGTCTCCTACGACGTGCTCGGCGACGTGGTGTGCGGCGGCTTCGCGATGCCGATCCGTGAAAACAAGGCGCAGGAAATCTACATCGTCATGTCCGGCGAGATGATGGCGCTGTATGCCGCCAACAACATCTCCAAGGGCATTCTGAAGTACGCTTCGTCGGGCGGCGTCCGTCTCGGCGGCCTGATCTGCAACGAGCGCCAGACCGACCGCGAGCTCGACCTCGCCGAAGCGCTGGCCAAGAAGCTGAACTCGAAGCTGATCCACTTCGTGCCGCGCGACAATATCGTGCAGCACGCCGAGCTGCGCCGCCAGACCGTGATCCAGTACGCGCCCGACAGCCAGCAGGCTAAGGAATATCGCGCCCTGGCCAACAAGGTCCATGCCAACTGCGGCAACGGCACCATCCCGACCCCGATCACCATGGAAGAGCTGGAAGAGATGCTGCTCGACTTCGGCATCATGAAGACCGAGGAGCAGCAGCTCGCCGAGCTCGCCGCCAAGGAAGCCGCCAAGGCGGCCGCGTCCGCCTGATCGCATCAGCCAGGCCGGTCGCCTAGCGCGACCGGCCGCCATCCCGGCGGCCCCAGACACGAGGAACAACGATGAGCACCGCAGTCGCAGAATCCCCCGCGGACATCAAGGAACGTAACAAGAAGCTGATCGGCGAAGTCCTGGAGGCCTATCCGGACAAGTCGGCCAAGCGTCGCGCCAAGCATCTCAACACGTACGACGCCGAGAAGGCGGAGTGCTCGGTCAAGTCCAACATCAAGTCGATCCCGGGCGTGATGACGATCCGCGGTTGCGCCTACGCCGGCTCGAAGGGCGTGGTGTGGGGCCCGATCAAGGACATGGTCCACATCAGCCACGGCCCGGTCGGCTGCGGCCAGTATTCGTGGGGTTCGCGCCGCAACTATTACAAGGGAACCACCGGCGTCGACACTTTCGGCACGATGCAGTTCACCTCCGACTTCCAGGAGAAGGACATCGTTTTCGGCGGTGACAAGAAGCTCGGCAAGATCATCGACGAGATCCAGGAGCTGTTCCCGCTCTCCAAGGGCATCTCGGTGCAGTCGGAATGCCCGATCGGTCTGATCGGCGACGACATCGAGGCGGTCTCCAAGGCCAAGTCGAAGCAGTATGACGGCAAGCCGATCATCCCGGTCCGCTGCGAAGGCTTCCGCGGCGTGTCGCAGTCGCTCGGCCACCACATCGCCAACGACGTGATCCGTGACTGGGTGTTCGACAAGGCCGCCGAGAAGAACGCCGGCTTCCAGTCGACCCCCTACGACGTCGCGATCATCGGCGACTACAACATCGGCGGCGATGCCTGGGCCTCGCGCATCCTGCTCGAGGAAATGGGCCTCCGCGTGATCGCGCAGTGGTCCGGCGACGGCACCATCGCGGAGCTGGAGAACACCCCGAAGGCGAAGCTGAACATCCTGCACTGCTACCGCTCGATGAACTACATCACGCGGCACATGGAAGAGAAGTTCGGTATTCCGTGGGTTGAATACAACTTCTTCGGCCCGTCCAAGATCGA' #Loading the nitrogenase from load import load_nitrogenase_seq nitrogenase = load_nitrogenase_seq() sys.setrecursionlimit(20000) if __name__ == "__main__": i = 0 holder_dna = [] for a in metagenome[1:5]: dna = a[1] snippet = find_all_ORFs_both_strands(dna) for item in snippet: if len(item[0]) > .8 * len(nitrogenase): holder_dna.append(item) data_output = [] lengths = [len(item[0]) for item in holder_dna]
from load import load_nitrogenase_seq from load import load_metagenome def longestSubstrings(meta, nitrogen): ans = [] for m in meta: length = 0 longest = '' for x in range(len(nitrogen)): span = 1 while(nitrogen[x:x+span] in m[1] and x+span < len(nitrogen)): if(span>length): length = span longest = nitrogen[x:x+span] span += 1 if(length >= 20): ans.append([m[0], length, longest]) return ans if __name__ == '__main__': nitrogen = load_nitrogenase_seq().replace('\n', '') meta = load_metagenome() print(longestSubstrings(meta, nitrogen))
from load import load_nitrogenase_seq from load import load_metagenome #import pypy from gene_finder import * nitrogenase = str(load_nitrogenase_seq()) metagenome = load_metagenome() one_metagenome = metagenome[0] #a tuple def longestSubstringMetagenomeSnippet(enzyme, metagenome_snippet): m = [[0] * (1 + len(metagenome_snippet)) for i in range(1 + len(enzyme))] #creates a list of lists longest, x_longest = 0, 0 for x in range(1, 1 + len(enzyme)): #x starts with 1, goes up to (1 + length of 1st string) for y in range(1, 1 + len(metagenome_snippet)): #y starts with 1, goes up to (1 + length of 2nd string) if enzyme[x - 1] == metagenome_snippet[y - 1]: m[x][y] = m[x - 1][y - 1] + 1 #if the upper-left diagonal characters are equal, if m[x][y] > longest: longest = m[x][y] x_longest = x else: m[x][y] = 0 return enzyme[x_longest - longest: x_longest] def longestSubstringAllMetagenomeSnippets(enzyme, metagenome): i = 0 ORF_lengths = [] genome_names = [] #I think that's what you call a part of a metagenome? for name, metagenome_snippet in metagenome: ORF = longestSubstringMetagenomeSnippet(enzyme, metagenome_snippet)