def going_beyond(): from load import load_nitrogenase_seq nitrogenase = load_nitrogenase_seq() #print nitrogenase from load import load_metagenome metagenome = load_metagenome() longest_snippet = "" k = 0 while k < len(metagenome): i = 0 while i < len(nitrogenase): j = 0 while j < len(metagenome[k][1]): length = 0 while (i + length < len(nitrogenase)) and (j + length < len(metagenome[k][1])) and (nitrogenase[i + length] == metagenome[k][1][j + length]): length += 1 if length > len(longest_snippet): longest_snippet = nitrogenase[i:i+length] j += 1 + length #adding length here makes the program run a little faster i += 1 k += 1 return longest_snippet
def substring_checkc(): """Returns the parts of the string that match""" nitrogenase = load_nitrogenase_seq() metagenome = load_metagenome()#So, this actually is a list of tubles with the name of a seqquence and then the sequence. for i in metagenome: print(i) pass
def nitrogenase_substring(): """ finds the metagenome with the longest substring in common with the nitrogenase sequence. I have no idea if it works, but it spit out an answer with no errors after over an hour of running, so i'm considering that success. """ import load nit_seq = load.load_nitrogenase_seq() metagenomes = load.load_metagenome() subs = [] for meta in metagenomes: subs.append((meta[0], longest_substring(meta[1], nit_seq))) return max(subs, key=lambda s:s[1])[0]
# -*- coding: utf-8 -*- """ Last updated: April 11, 2016 Find Nitrogenase @author: Erica Lee, Rebecca Gettys, Liv Kelley """ import random from amino_acids import aa, codons, aa_table # you may find these useful #Importing the metagenome from load import load_metagenome metagenome = load_metagenome() # metagenome = 'ATGGGAAAACTCCGGCAGATCGCTTTCTACGGCAAGGGCGGGATCGGCAAGTCGACGACCTCGCAGAACACCCTCGCGGCACTGGTCGAGATGGGTCAGAAGATCCTCATCGTCGGCTGCGATCCCAAGGCCGACTCGACCCGCCTGATCCTGAACACCAAGCTGCAGGACACCGTGCTTCACCTCGCCGCCGAAGCGGGCTCCGTCGAGGATCTCGAACTCGAGGATGTGGTCAAGATCGGCTACAAGGGCATCAAATGCACCGAAGCCGGCGGGCCGGAGCCGGGCGTGGGCTGCGCGGGCCGCGGCGTCATCACCGCCATCAACTTCCTGGAAGAGAACGGCGCCTATGACGACGTCGACTACGTCTCCTACGACGTGCTGGGCGACGTGGTCTGCGGCGGCTTCGCCATGCCGATCCGCGAGAACAAGGCGCAGGAAATCTACATCGTCATGTCGGGCGAGATGATGGCGCTCTATGCGGCCAACAACATCGCCAAGGGCATCCTGAAATACGCGAACTCGGGCGGCGTGCGCCTCGGCGGCCTGATCTGCAACGAGCGCAAGACCGACCGCGAGCTGGAACTGGCCGAGGCCCTCGCCGCGCGTCTGGGCTGCAAGATGATCCACTTCGTTCCGCGCGACAATATCGTGCAGCACGCCGAGCTCCGCCGCGAGACGGTCATCCAGTATGCGCCCGAGAGCAAGCAGGCGCAGGAATATCGCGAACTGGCCCGCAAGATCCACGAGAACTCGGGCAAGGGCGTGATCCCGACCCCGATCACCATGGAAGAGCTGGAAGAGATGCTGATGGATTTCGGCATCATGCAGTCCGAGGAAGACCGGCTCGCCGCCATCGCCGCCGCCGAGGCCTGA' #Loading the nitrogenase from load import load_seq from load import load_nitrogenase_seq nitrogenase = load_nitrogenase_seq() def get_complement(nucleotide): """ Returns the complementary nucleotide nucleotide: a nucleotide (A, C, G, or T) represented as a string returns: the complementary nucleotide >>> get_complement('A') 'T'
from load import load_nitrogenase_seq from load import load_metagenome def longestSubstrings(meta, nitrogen): ans = [] for m in meta: length = 0 longest = '' for x in range(len(nitrogen)): span = 1 while (nitrogen[x:x + span] in m[1] and x + span < len(nitrogen)): if (span > length): length = span longest = nitrogen[x:x + span] span += 1 if (length >= 20): ans.append([m[0], length, longest]) return ans if __name__ == '__main__': nitrogen = load_nitrogenase_seq().replace('\n', '') meta = load_metagenome() print(longestSubstrings(meta, nitrogen))
def loading_dna(): metagenome = load_metagenome()#So, this actually is a list of tubles with the name of a seqquence and then the sequence. pass