Пример #1
0
from __future__ import print_function
import os
from functools import reduce

from revp import read_fasta


def longest_common_substring(s1, s2):
    ''' See
    http://en.wikipedia.org/wiki/Longest_common_substring_problem#Pseudocode
    for more details '''

    L = {}
    z = 0

    for i, c1 in enumerate(s1):
        for j, c2 in enumerate(s2):
            if c1 == c2:
                L[(i, j)] = L.get((i - 1, j - 1), 0) + 1

                if L[(i, j)] > z:
                    z = L[(i, j)]
                    ret = s1[i - z + 1:i + 1]
    return ret


if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_lcsm.txt')) as dataset:
        seqs = read_fasta(dataset)
        print(reduce(longest_common_substring, set(seqs.values())))
Пример #2
0
#!/usr/bin/env python

from __future__ import print_function
import os
from math import factorial

from revp import read_fasta


def perfect_matchings(seq):
    return factorial(seq.count('G')) * factorial(seq.count('A'))


if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_pmch.txt')) as dataset:
        seq = read_fasta(dataset).popitem()[1]
    print(perfect_matchings(seq))
Пример #3
0
#!/usr/bin/env python

from __future__ import print_function
import os
from math import factorial

from revp import read_fasta


def maximum_matchings(seq):
    min_gc, max_gc = sorted([seq.count('G'), seq.count('C')])
    min_au, max_au = sorted([seq.count('A'), seq.count('U')])
    return (factorial(max_gc) // factorial(max_gc - min_gc) *
            factorial(max_au) // factorial(max_au - min_au))


if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_mmch.txt')) as dataset:
        seq = read_fasta(dataset).popitem()[1]
    print(maximum_matchings(seq))
Пример #4
0

UNIPROT_URL = 'http://www.uniprot.org/uniprot/'


def find_N_glycosylation_motif(protein):
    finds = []
    for pos in substring_find(protein, 'N'):
        if len(protein[pos-1:pos + 3]) == 4:
            if (protein[pos] != 'P' and protein[pos + 1] in ('S', 'T')
                and protein[pos + 2] != 'P'):
                    finds.append(pos)
    return finds


if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_mprt.txt')) as dataset:
        uniprot_ids = [r.rstrip() for r in dataset.readlines()]

    proteins = {}
    for uniprot_id in uniprot_ids:
        prot = urlopen('{0}{1}.fasta'.format(UNIPROT_URL, uniprot_id))
        proteins[uniprot_id] = read_fasta(prot).popitem()[1]
        prot.close()

    for protein in proteins:
        pos = find_N_glycosylation_motif(proteins[protein])
        if pos:
            print(protein)
            print(*pos)