def test_modify_query_ends(query_subject_example): query, subject = query_subject_example c = Contig(query, subject, "Example") le, re = query.left_end + 15, query.right_end - 10 c.modify_query_ends(le, re) assert c.query.left_end == le assert c.query.right_end == re assert c.query.length == c.subject.length
def test_contig_copy(query_subject_example): query, subject = query_subject_example c = Contig(query, subject, "Example") c_copy = c.copy() assert c_copy is not c c2_copy = copy(c) assert c2_copy is not c assert c2_copy.contig_id != c.contig_id assert c_copy.contig_id != c.contig_id assert c_copy.contig_id != c2_copy.contig_id
def test_modify_subject(query_subject_example): query, subject = query_subject_example c = Contig(query, subject, "Example") qs, qe = query.start-10, query.end-10 ss, se = subject.start+10, subject.end+10 c.modify_subject(ss, se) assert c.query.start == c.query.context.translate_pos(qs) assert c.query.end == c.query.context.translate_pos(qe) assert c.query.length == c.subject.length assert c.subject.start == c.subject.context.translate_pos(ss) assert c.subject.end == c.subject.context.translate_pos(se)
def test_subquery(): c1 = Context(10000, True) c2 = Context(5000, True) q1 = ContigRegion(100, 300, context=c1, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) contig = Contig(q1, s1, "BLAST") sub_query1 = contig.sub_query(200, 250) sub_query2 = contig.sub_query(220, 300) assert len(sub_query1) == 51 assert len(sub_query2) == 81
def test_contig_reverse_direction(query_subject_example): """This contig should reverse the direction of contig""" query, subject = query_subject_example query.sequence = "ACGTAGTGCTGTSCGGCTGTGATGCGT" subject.sequence = "ACGTAGTGCTGTSCGGCTGTGATGCGT" c = Contig(query, subject, "Example") qcopy = query.copy() scopy = subject.copy() c.reverse_direction() assert c.query.start == qcopy.end assert c.query.end == qcopy.start assert c.subject.start == scopy.end assert c.subject.end == scopy.start assert c.query.sequence == qcopy.sequence[::-1] assert c.subject.sequence == scopy.sequence[::-1]
def test_contig_constructor_with_context_error(query_subject_example): """This should raise a ContigError since the region lengths between r1 and r2 are different""" query, subject = query_subject_example with pytest.raises(ContigError): r1 = Region(1, 2, context=Context(100, True, start_index=1)) r2 = Region(1, 20, context=Context(100, True, start_index=1)) Contig(r1, r2, "Throws error")
def test_divide_contigs(): c1 = Context(10000, True) c2 = Context(8000, True) q1 = ContigRegion(1000, 3000, context=c1, forward=True) s1 = ContigRegion(5000, 7000, context=c2, forward=True) contig = Contig(q1, s1, "BLAST") # not including self divided_contigs = contig.divide_contig( [1100, 1200], [2000, 2200, 2300], include_self=False) assert len(divided_contigs) == 12 # including self divided_contigs = contig.divide_contig( [1100, 1200], [2000, 2200, 2300], include_self=True) assert len(divided_contigs) == 13
def test_modify_subject_end(query_subject_example): query, subject = query_subject_example c = Contig(query, subject, "Example") r = c.subject meth = c.modify_subject_end p1 = r.end+int(r.length * 0.9) p2 = r.end+int(r.length * -0.9) p3 = r.end+int(r.length * 200.1) p4 = r.end+int(r.length * -200.1) for p in [p1, p2]: meth(p) assert r.end == r.context.translate_pos(p) for p in [p3, p4]: with pytest.raises(RegionError): meth(p)
def test_contig_schema(): c1 = Context(10000, True) c2 = Context(5100, True) q1 = ContigRegion(100, 300, context=c1, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) contig1 = Contig(q1, s1, "BLAST", data1=5) schema = schemas.ContigSchema() data = schema.dump(contig1) assert data['query']['start'] == 100 assert data['query']['end'] == 300 assert data['query']['context']['length'] == 10000 assert data['subject']['start'] == 500 assert data['subject']['end'] == 700 assert data['subject']['context']['length'] == 5100 assert data['quality'] assert data['metadata'] == {'data1': 5} assert data['alignment_length'] == 201
def perfect_matches(self, rc=True): """ Pseudo-blast for finding perfect sequence matches (i.e. primers) :param rc: :return: """ # Get primer sequences (defined in db) out, seqs, metadata = self.concate_db_to_fsa() contig_container = ContigContainer() # Get the query sequence query_seq = open_sequence(self.query)[0].seq query_seq_str = str(query_seq) query_seq_str = re.sub('[nN]', '.', query_seq_str).lower() fwd_matches = [] rev_matches = [] for seq in seqs: seq_str = str(seq.seq) try: rc_seq_str = dna_reverse_complement(seq_str) except KeyError: continue seq_str = re.sub('[nN]', '.', seq_str).lower() rc_seq_str = re.sub('[nN]', '.', rc_seq_str).lower() for match in re.finditer(seq_str, query_seq_str): subject = ContigRegion( seq.id, ContigRegion.START_INDEX, len(seq), len(seq), False, True, sequence=seq_str, ) query = ContigRegion( self.query, match.start() + ContigRegion.START_INDEX, match.end() + ContigRegion.START_INDEX - 1, self.query_length, self.query_circular, True, ) c = Contig(query, subject, Contig.TYPE_PRIMER) contig_container.contigs.append(c) for match in re.finditer(rc_seq_str, query_seq_str): subject = ContigRegion( seq.id, len(seq), ContigRegion.START_INDEX, len(seq), False, False, sequence=seq_str, ) query = ContigRegion( self.query, match.start() + ContigRegion.START_INDEX, match.end() + ContigRegion.START_INDEX - 1, self.query_length, self.query_circular, True, ) c = Contig(query, subject, Contig.TYPE_PRIMER) contig_container.contigs.append(c) return contig_container
def test_alignment_length(query_subject_example): query, subject = query_subject_example c = Contig(query, subject, "Example") assert c.alignment_length == query.length
def test_invalid_region(query_subject_example): query, subject = query_subject_example query.start = 2 with pytest.raises(ContigError): c = Contig(query, subject, "Example")
def test_contig_constructor(query_subject_example): """This tests constructing a contig from a subject and query""" query, subject = query_subject_example c = Contig(query, subject, "Example") assert c.query == query assert c.subject == subject
def test_fuse(): # Test fuse c1 = Context(10000, True) c2 = Context(5100, True) q1 = ContigRegion(100, 300, context=c1, forward=True) q2 = ContigRegion(301, 600, context=c1, forward=True) q3 = ContigRegion(301, 600, context=c2, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) s2 = ContigRegion(701, 1000, context=c2, forward=True) s3 = ContigRegion(701, 1000, context=c2, forward=True) contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig1.fuse(contig2) assert contig1.query.end == 600 # Raises error if one has sequence and other does not contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") a = "A"*contig1.query.length b = "B"*contig1.subject.length c = "C"*contig2.query.length d = "D"*contig2.subject.length contig1.query.sequence = a contig1.subject.sequence = b contig2.query.sequence = c contig2.subject.sequence = d contig1.fuse(contig2) assert contig1.query.sequence == a + c assert contig1.subject.sequence == b + d # Raises error if one has sequence and other does not contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig1.query.sequence = "AGTCTGAGCTGTCGTGATAGTGCTGA" contig1.subject.sequence = "AGTYAGYCHYAYHCYHYSCHYHAYCY" with pytest.raises(ContigError): contig1.fuse(contig2) # Raise error if different contexts contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig3 = Contig(q3, s3, "BLAST") with pytest.raises(ContigError): contig3.fuse(contig2)