from sequence import rev_complement from fastamasta import FastaReader from functional import partition if __name__ == "__main__": seq = [i for i in FastaReader("data/44.fas")][0][1] rev_palindromes = [] for i in range(4, 13): parts = [''.join(x) for x in partition(seq, i, list)] for j in range(len(parts)): if parts[j] == rev_complement(parts[j]): rev_palindromes.append((j + 1, i)) for tup in rev_palindromes: print tup[0], tup[1]
from fastamasta import FastaReader from functional import partition if __name__ == "__main__": data = [] for datum in FastaReader("data/21.fas"): data.append(datum[1]) shortest = ' ' * 1001 for i in data: if len(i) < len(shortest): shortest = i data.remove(shortest) print data for i in range(2, len(shortest) + 1)[::-1]: parts = [i for i in partition(shortest, i, str)] for part in parts: in_all = True for datum in data: if part not in datum: in_all = False break if in_all: print part
from aaiter import find_orfs from fastamasta import FastaReader from sequence import translate, rev_complement if __name__ == "__main__": reader = FastaReader("data/12.dat") seq = reader.readnext()[1] rcseq = rev_complement(seq) orfs = find_orfs(seq) rorfs = find_orfs(rcseq) candidates = set() for orf in orfs: candidates.add(translate(seq[orf[0]:orf[1]])) for rorf in rorfs: candidates.add(translate(rcseq[rorf[0]:rorf[1]])) for candidate in candidates: print candidate
from fastamasta import FastaReader from functional import frequencies if __name__ == '__main__': fr = FastaReader('data/6.fas') hi_gc_content = ('', 0.0) while True: current = fr.readnext() if current == None: break sym_count = frequencies(current[1]) curr_gc_content = float(sym_count['C'] + sym_count['G']) / float( len(current[1])) if curr_gc_content > hi_gc_content[1]: hi_gc_content = (current[0], curr_gc_content) print hi_gc_content[0] print round(hi_gc_content[1] * 100, 6)
from Bio import pairwise2 from Bio.SubsMat.MatrixInfo import blosum62 from fastamasta import FastaReader if __name__ == "__main__": data = [i[1] for i in FastaReader("data/35.fas")] for i in pairwise2.align.globalds(data[0], data[1], blosum62, -11, -1): print int(i[2]) print i[0] print i[1] break
from fastamasta import FastaReader if __name__ == "__main__": seq = FastaReader("data/22.fas").readnext()[1] fail_arr = [0] * len(seq) for i in range(1, len(seq)): j = fail_arr[i-1] while (j > 0) and seq[i] != seq[j]: j = fail_arr[j-1] if (seq[i] == seq[j]): j += 1 fail_arr[i] = j for i in fail_arr: print i,
from functional import partition, frequencies from fastamasta import FastaReader def alphabet(order, length, string='', arr=None): if arr is None: arr=[] if length == 0: arr.append(string) else: for char in order: alphabet(order, length-1, string + char, arr) return arr if __name__ == "__main__": with open("data/13.dat", 'r') as data_file: reader = FastaReader('data/14.dat') data = reader.readnext() order, size = ['A', 'C', 'G', 'T'], 4 alph = alphabet(order, size) fourmer_count = frequencies(partition(data[1], size, str)) output = '' for word in alph: output += str(fourmer_count.get(word, 0)) + ' ' print output
from fastamasta import FastaReader from sequence import edit_dist if __name__ == "__main__": data = [i for i in FastaReader("data/30.fas")] a, b = data[0][1], data[1][1] print edit_distance(a, b)