Пример #1
0
from sequence import rev_complement
from fastamasta import FastaReader
from functional import partition

if __name__ == "__main__":
    seq = [i for i in FastaReader("data/44.fas")][0][1]

    rev_palindromes = []
    for i in range(4, 13):
        parts = [''.join(x) for x in partition(seq, i, list)]
        for j in range(len(parts)):
            if parts[j] == rev_complement(parts[j]):
                rev_palindromes.append((j + 1, i))

    for tup in rev_palindromes:
        print tup[0], tup[1]
Пример #2
0
from fastamasta import FastaReader
from functional import partition

if __name__ == "__main__":
    data = []

    for datum in FastaReader("data/21.fas"):
        data.append(datum[1])

    shortest = ' ' * 1001
    for i in data:
        if len(i) < len(shortest):
            shortest = i

    data.remove(shortest)
    print data

    for i in range(2, len(shortest) + 1)[::-1]:
        parts = [i for i in partition(shortest, i, str)]
        for part in parts:
            in_all = True
            for datum in data:
                if part not in datum:
                    in_all = False
                    break
            if in_all:
                print part
Пример #3
0
from aaiter import find_orfs
from fastamasta import FastaReader
from sequence import translate, rev_complement

if __name__ == "__main__":
    reader = FastaReader("data/12.dat")
    seq = reader.readnext()[1]
    rcseq = rev_complement(seq)

    orfs = find_orfs(seq)
    rorfs = find_orfs(rcseq)

    candidates = set()
    for orf in orfs:
        candidates.add(translate(seq[orf[0]:orf[1]]))

    for rorf in rorfs:
        candidates.add(translate(rcseq[rorf[0]:rorf[1]]))

    for candidate in candidates:
        print candidate
Пример #4
0
from fastamasta import FastaReader
from functional import frequencies

if __name__ == '__main__':
    fr = FastaReader('data/6.fas')
    hi_gc_content = ('', 0.0)

    while True:
        current = fr.readnext()

        if current == None:
            break

        sym_count = frequencies(current[1])
        curr_gc_content = float(sym_count['C'] + sym_count['G']) / float(
            len(current[1]))

        if curr_gc_content > hi_gc_content[1]:
            hi_gc_content = (current[0], curr_gc_content)

print hi_gc_content[0]
print round(hi_gc_content[1] * 100, 6)
Пример #5
0
from Bio import pairwise2
from Bio.SubsMat.MatrixInfo import blosum62
from fastamasta import FastaReader

if __name__ == "__main__":
    data = [i[1] for i in FastaReader("data/35.fas")]

    for i in pairwise2.align.globalds(data[0], data[1], blosum62, -11, -1):
        print int(i[2])
        print i[0]
        print i[1]
        break
Пример #6
0
from fastamasta import FastaReader

if __name__ == "__main__":
    seq = FastaReader("data/22.fas").readnext()[1]


    fail_arr = [0] * len(seq)

    for i in range(1, len(seq)):
        j = fail_arr[i-1]
        while (j > 0) and seq[i] != seq[j]:
            j = fail_arr[j-1]
        if (seq[i] == seq[j]):
            j += 1
        fail_arr[i] = j


    for i in fail_arr:
        print i,


Пример #7
0
from functional import partition, frequencies
from fastamasta import FastaReader

def alphabet(order, length, string='', arr=None):
    if arr is None:
        arr=[]
    if length == 0:
        arr.append(string)
    else:
        for char in order:
            alphabet(order, length-1, string + char, arr)
    return arr

if __name__ == "__main__":
    with open("data/13.dat", 'r') as data_file:
        reader = FastaReader('data/14.dat')
        data = reader.readnext()

        order, size = ['A', 'C', 'G', 'T'], 4
        alph = alphabet(order, size)

        fourmer_count = frequencies(partition(data[1], size, str))

        output = ''
        for word in alph:
            output += str(fourmer_count.get(word, 0)) + ' '

        print output
Пример #8
0
from fastamasta import FastaReader
from sequence import edit_dist

if __name__ == "__main__":
    data = [i for i in FastaReader("data/30.fas")]
    a, b = data[0][1], data[1][1]

    print edit_distance(a, b)