def test_region_subject_to_query_00(self): a = AlignedPair( ("a", "ABCDEFG"), ("b", "---KLMN"), ) r = AlignedRegion.from_subject(a, 0, 2) self.assertEqual(r.in_alignment(), (3, 5)) # KL self.assertEqual(r.in_subject(), (0, 2)) # KL self.assertEqual(r.in_query(), (3, 5)) # DE r = AlignedRegion.from_subject(a, 0, 0) self.assertEqual(r.in_alignment(), (3, 3)) # --- | KLMN self.assertEqual(r.in_subject(), (0, 0)) # empty sequence self.assertEqual(r.in_query(), (3, 3)) # ABC | DEFG
def test_from_subject_region_crazy(self): a = AlignedPair( ("a", "-A-BC-EF---"), ("b", "--HI-JK-LMN")) r = AlignedRegion.from_subject(a, 0, 3) self.assertEqual(r.start_idx, 2) self.assertEqual(r.end_idx, 6)
def test_from_subject_with_endgaps(self): a = AlignedPair( ("a", "--ABC-EF---"), ("b", "HIJKLMNOPQR")) r = AlignedRegion.from_subject(a, 1, 7) self.assertEqual(r.start_idx, 1) self.assertEqual(r.end_idx, 7)
def test_from_subject_no_endgaps(self): a = AlignedPair( ("a", "ABCDEF"), ("b", "HIJKLM")) r = AlignedRegion.from_subject(a, 2, 5) self.assertEqual(r.start_idx, 2) self.assertEqual(r.end_idx, 5)
def find_in_seqs(self, seqs): if seqs.all_matched(): return # Create the file paths subject_fp = self._make_fp("subject_{0}.fa".format(self.suffix)) query_fp = self._make_fp("query_{0}.fa".format(self.suffix)) result_fp = self._make_fp("query_{0}.txt".format(self.suffix)) # Search with open(subject_fp, "w") as f: write_fasta(f, seqs.get_matched_offset0()) ba = VsearchAligner(subject_fp) search_args = { "min_id": round(self.min_pct_id / 100, 2), "top_hits_only": None} if self.cores > 0: search_args["threads"] = self.cores hits = ba.search( seqs.get_unmatched_recs(), input_fp=query_fp, output_fp=result_fp, **search_args) # Refine bext = HitExtender(seqs.get_unmatched_recs(), seqs.get_matched_offset0()) for hit in hits: alignment = bext.extend_hit(hit) subject_match = seqs.matches[alignment.subject_id] aligned_region = AlignedRegion.from_subject( alignment, subject_match.start, subject_match.end) query_start_idx, query_end_idx = aligned_region.in_query() query_offset = aligned_region.query_offset() matchobj = PrimerMatch( query_start_idx, query_end_idx, query_offset, "Alignment") yield alignment.query_id, matchobj
def test_region_subject_to_query_crazy_alignment(self): a = AlignedPair( ("a", "-A-BC-EF---"), ("b", "--HI-JK-LMN")) r = AlignedRegion.from_subject(a, 0, 3) self.assertEqual(r.in_subject(), (0, 3)) # HIJ self.assertEqual(r.in_alignment(), (2, 6)) # HI-J self.assertEqual(r.in_query(), (1, 3)) # BC in HIJ r = AlignedRegion.from_subject(a, 1, 4) self.assertEqual(r.in_subject(), (1, 4)) # IJK self.assertEqual(r.in_alignment(), (3, 7)) # I-JK self.assertEqual(r.in_query(), (1, 4)) # BCE in IJK r = AlignedRegion.from_subject(a) self.assertEqual(r.in_subject(), (0, 7)) # whole sequence, HIJKLMN self.assertEqual(r.in_alignment(), (2, 11)) # HI-JK-LMN self.assertEqual(r.in_query(), (1, 5)) # BCEF in subject
def test_region_subject_to_query_with_endgaps(self): a = AlignedPair( ("a", "--ABC-EF---"), ("b", "HIJKLMNOPQR")) r = AlignedRegion.from_subject(a, 0, 3) self.assertEqual(r.in_subject(), (0, 3)) # HIJ self.assertEqual(r.in_alignment(), (0, 3)) self.assertEqual(r.in_query(), (0, 1)) # A in HIJ r = AlignedRegion.from_subject(a, 1, 6) self.assertEqual(r.in_subject(), (1, 6)) # IJKLM self.assertEqual(r.in_alignment(), (1, 6)) self.assertEqual(r.in_query(), (0, 3)) # ABC in IJKLM r = AlignedRegion.from_subject(a) self.assertEqual(r.in_subject(), (0, 11)) # whole sequence self.assertEqual(r.in_alignment(), (0, 11)) self.assertEqual(r.in_query(), (0, 5)) # ABCEF in subject
def test_region_subject_to_query_no_endgaps(self): a = AlignedPair( ("a", "ABCDEF"), ("b", "HIJKLM")) # In an alignment with no gaps, the query sequence coordinates should # always match the subject sequence coordinates r = AlignedRegion.from_subject(a, 0, 3) self.assertEqual(r.in_alignment(), (0, 3)) rq = AlignedRegion.from_query(a, 0, 3) self.assertEqual(r.in_alignment(), (0, 3)) r = AlignedRegion.from_subject(a, 1, 5) self.assertEqual(r.in_alignment(), (1, 5)) rq = AlignedRegion.from_query(a, 1, 5) self.assertEqual(r.in_alignment(), (1, 5)) r = AlignedRegion.from_subject(a) self.assertEqual(r.in_alignment(), (0, 6)) rq = AlignedRegion.from_query(a) self.assertEqual(r.in_alignment(), (0, 6))