def align_to_ref(ref_seq, base_seq): """Aligns a sequence to the reference and caches the result for fast lookup later. Returns a tuple (base_seq, ref_seq) properly aligned. ref_seq -- The reference sequence to use as a guide. query_seq -- The query sequence. Returns: query_aln -- The aligned query sequence. ref_aln -- The aligned reference sequence. """ seqs = [('query', base_seq), ('ref', ref_seq)] aligned = dict(GeneralSeqTools.call_muscle(seqs)) return aligned['query'], aligned['ref']
def get_region(seq, reference, regions = None): if regions == None: regions = [(300, 400)] tmp_seqs = [('conc', reference), ('guess', seq)] aligned = dict(GeneralSeqTools.call_muscle(tmp_seqs)) out = [] for _, start, stop in regions: conc_pos = 0 align_start = None for align_pos, l in enumerate(aligned['conc']): if l != '-': conc_pos += 1 if conc_pos == start: align_start = align_pos if conc_pos == stop: align_stop = align_pos break yield seq[align_start:align_stop].replace('-', '')
def test_muscle_basic_call(): seqs = [('test1', 'ATCGATTGC'), ('test2', 'ATCGATGC')] aln = [('test1', 'ATCGATTGC'), ('test2', 'ATCGA-TGC')] res = list(GeneralSeqTools.call_muscle(seqs)) eq_(res, aln)