Пример #1
0
def going_beyond():
	from load import load_nitrogenase_seq
	nitrogenase = load_nitrogenase_seq()
	#print nitrogenase

	from load import load_metagenome
	metagenome = load_metagenome()

	longest_snippet = ""
	k = 0
	while k < len(metagenome):
		i = 0
		while i < len(nitrogenase):
			j = 0
			while j < len(metagenome[k][1]):
				length = 0
				while  (i + length < len(nitrogenase)) and (j + length < len(metagenome[k][1])) and (nitrogenase[i + length] == metagenome[k][1][j + length]):
					length += 1
				if length > len(longest_snippet):
					longest_snippet = nitrogenase[i:i+length]
				j += 1 + length #adding length here makes the program run a little faster
			i += 1
		k += 1

	return longest_snippet
Пример #2
0
def substring_checkc():
    """Returns the parts of the string that match"""
    nitrogenase = load_nitrogenase_seq()
    metagenome = load_metagenome()#So, this actually is a list of tubles with the name of a seqquence and then the sequence.
    
    for i in metagenome:
        print(i)
    pass
Пример #3
0
def nitrogenase_substring():
    """ finds the metagenome with the longest substring in common with
    the nitrogenase sequence.

    I have no idea if it works, but it spit out an answer with no errors after over an hour
    of running, so i'm considering that success.

    """
    import load
    nit_seq = load.load_nitrogenase_seq()
    metagenomes = load.load_metagenome()
    subs = []
    for meta in metagenomes:
        subs.append((meta[0], longest_substring(meta[1], nit_seq)))
    return max(subs, key=lambda s:s[1])[0]
Пример #4
0
# -*- coding: utf-8 -*-
"""
Last updated: April 11, 2016
Find Nitrogenase
@author: Erica Lee, Rebecca Gettys, Liv Kelley
"""

import random
from amino_acids import aa, codons, aa_table   # you may find these useful

#Importing the metagenome

from load import load_metagenome
metagenome = load_metagenome()
# metagenome = 'ATGGGAAAACTCCGGCAGATCGCTTTCTACGGCAAGGGCGGGATCGGCAAGTCGACGACCTCGCAGAACACCCTCGCGGCACTGGTCGAGATGGGTCAGAAGATCCTCATCGTCGGCTGCGATCCCAAGGCCGACTCGACCCGCCTGATCCTGAACACCAAGCTGCAGGACACCGTGCTTCACCTCGCCGCCGAAGCGGGCTCCGTCGAGGATCTCGAACTCGAGGATGTGGTCAAGATCGGCTACAAGGGCATCAAATGCACCGAAGCCGGCGGGCCGGAGCCGGGCGTGGGCTGCGCGGGCCGCGGCGTCATCACCGCCATCAACTTCCTGGAAGAGAACGGCGCCTATGACGACGTCGACTACGTCTCCTACGACGTGCTGGGCGACGTGGTCTGCGGCGGCTTCGCCATGCCGATCCGCGAGAACAAGGCGCAGGAAATCTACATCGTCATGTCGGGCGAGATGATGGCGCTCTATGCGGCCAACAACATCGCCAAGGGCATCCTGAAATACGCGAACTCGGGCGGCGTGCGCCTCGGCGGCCTGATCTGCAACGAGCGCAAGACCGACCGCGAGCTGGAACTGGCCGAGGCCCTCGCCGCGCGTCTGGGCTGCAAGATGATCCACTTCGTTCCGCGCGACAATATCGTGCAGCACGCCGAGCTCCGCCGCGAGACGGTCATCCAGTATGCGCCCGAGAGCAAGCAGGCGCAGGAATATCGCGAACTGGCCCGCAAGATCCACGAGAACTCGGGCAAGGGCGTGATCCCGACCCCGATCACCATGGAAGAGCTGGAAGAGATGCTGATGGATTTCGGCATCATGCAGTCCGAGGAAGACCGGCTCGCCGCCATCGCCGCCGCCGAGGCCTGA'



#Loading the nitrogenase

from load import load_seq
from load import load_nitrogenase_seq
nitrogenase = load_nitrogenase_seq()


def get_complement(nucleotide):
    """ Returns the complementary nucleotide
        nucleotide: a nucleotide (A, C, G, or T) represented as a string
        returns: the complementary nucleotide
    >>> get_complement('A')
    'T'
Пример #5
0
from load import load_nitrogenase_seq
from load import load_metagenome


def longestSubstrings(meta, nitrogen):
    ans = []
    for m in meta:
        length = 0
        longest = ''
        for x in range(len(nitrogen)):
            span = 1
            while (nitrogen[x:x + span] in m[1] and x + span < len(nitrogen)):
                if (span > length):
                    length = span
                    longest = nitrogen[x:x + span]
                span += 1
        if (length >= 20):
            ans.append([m[0], length, longest])
    return ans


if __name__ == '__main__':
    nitrogen = load_nitrogenase_seq().replace('\n', '')
    meta = load_metagenome()
    print(longestSubstrings(meta, nitrogen))
Пример #6
0
def loading_dna():
    metagenome = load_metagenome()#So, this actually is a list of tubles with the name of a seqquence and then the sequence.
    pass