def test_lp_and_rp_setter(): r = Region(1, 3, direction=Region.FORWARD, context=Context(9, False, start_index=1)) r.lp = 2 assert r.lp == 2 r.rp = 5 assert r.rp == 5 r.rp = 4 r = Region(3, 1, direction=Region.REVERSE, context=Context(9, False, start_index=1)) r.lp = 2 assert r.lp == 2 r.rp = 5 assert r.rp == 5 r.rp = 4 r = Region(2, 2, context=Context(9, False, start_index=1)) r.rp = 3 assert r.rp == 3 == r.lp r.lp = 4 assert r.rp == 4 == r.lp
def test_region_properties(): # Properties of linear region r = Region(1, 5, context=Context(10, False, start_index=1)) assert r.start == 1 assert r.end == 5 assert r.context_length == 10 assert not r.circular assert r.start_index == 1 # Properties of circular region r = Region(5, 9, context=Context(10, True, start_index=4)) assert r.start == 5 assert r.end == 9 assert r.context_length == 10 assert r.circular assert r.start_index == 4 # Properties of circular region r = Region(7, 5, direction=Region.REVERSE, context=Context(10, False, start_index=1)) assert r.start == 7 assert r.end == 5 assert r.context_length == 10 assert not r.circular assert r.start_index == 1
def test_extend_end(): r = Region(40, 30, direction=Region.REVERSE, context=Context(100, True, start_index=1)) re = 15 le = 7 l = r.length r.extend_right_end(re) r.extend_left_end(le) assert r.right_end == 40 + re assert r.left_end == 30 - le assert r.length == l + re + le r = Region(40, 30, direction=Region.REVERSE, context=Context(100, True, start_index=1)) re = -3 le = -2 l = r.length r.extend_right_end(re) r.extend_left_end(le) assert r.right_end == 40 + re assert r.left_end == 30 - le assert r.length == l + re + le
def query_subject_example(scope="module"): """This fixture returns an example of a subject and query""" subject = ContigRegion(911, 757, Context(9795, True), name="subjectexample", forward=False, filename="templates/pRIAS (CC15).gb") query = ContigRegion(1, 155, Context(22240, True), name="queryexample", forward=True, filename="templates/pRasdfasdfaIAS (CC15).gb") return [query, subject]
def test_contig_constructor_with_context_error(query_subject_example): """This should raise a ContigError since the region lengths between r1 and r2 are different""" query, subject = query_subject_example with pytest.raises(ContigError): r1 = Region(1, 2, context=Context(100, True, start_index=1)) r2 = Region(1, 20, context=Context(100, True, start_index=1)) Contig(r1, r2, "Throws error")
def test_eq_location(): r = Region(10, 20, context=Context(100, False, start_index=1)) r2 = Region(10, 20, context=Context(100, False, start_index=1)) r3 = Region(10, 20, context=Context(110, False, start_index=1)) assert r.equivalent_location(r2) assert r2.equivalent_location(r) with pytest.raises(RegionError): r.equivalent_location(r3) with pytest.raises(RegionError): r3.equivalent_location(r)
def test_ends_overlap_with_raise_region_error(): """This test should raise an error since the contexts are different""" with pytest.raises(RegionError): r = Region(10, 50, context=Context(100, False, start_index=0)) r2 = Region(20, 90, context=Context(100, True, start_index=0)) r.end_overlaps_with(r2) with pytest.raises(RegionError): r = Region(10, 50, context=Context(100, False, start_index=0)) r2 = Region(20, 90, context=Context(100, False, start_index=1)) r.end_overlaps_with(r2)
def test_ends_overlap_with_reverse_regions(): """This tests ends overlaps with reverse regions""" r = Region(50, 10, direction=Region.REVERSE, context=Context(100, False, start_index=0)) r2 = Region(90, 20, direction=Region.REVERSE, context=Context(100, False, start_index=0)) assert r.end_overlaps_with(r2) assert not r2.end_overlaps_with(r)
def test_subquery(): c1 = Context(10000, True) c2 = Context(5000, True) q1 = ContigRegion(100, 300, context=c1, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) contig = Contig(q1, s1, "BLAST") sub_query1 = contig.sub_query(200, 250) sub_query2 = contig.sub_query(220, 300) assert len(sub_query1) == 51 assert len(sub_query2) == 81
def test_overlap_circular(): """ Tests overlap with circular regions r1 ------| |------- r2 ---------| |---- """ r1 = Region(90, 10, context=Context(100, True, start_index=2)) r2 = Region(95, 20, context=Context(100, True, start_index=2)) overlap = r1.get_overlap(r2) assert overlap.start == 95 assert overlap.end == 10
def test_overlap_linear_forward(): """This tests that overlap region has basic attributes expected""" r = Region(10, 50, context=Context(100, False, start_index=0)) r2 = Region(20, 90, context=Context(100, False, start_index=0)) overlap = r.get_overlap(r2) print(overlap.start, overlap.end) assert overlap.same_context(r) assert overlap.same_context(r2) assert overlap.start == 20 assert overlap.end == 50 assert overlap.direction == r.direction assert r2.get_overlap(r) is None
def test_extend_start(): # linear, forward r = Region(10, 20, context=Context(100, False, start_index=1)) r.extend_start(5) assert r.start == 5 # linear, reverse r = Region(20, 10, direction=Region.REVERSE, context=Context(100, False, start_index=1)) r.extend_start(5) assert r.start == 25 # linear, forward, negative r = Region(10, 20, context=Context(100, False, start_index=1)) r.extend_start(-5) assert r.start == 15 # linear, reverse, negative r = Region(20, 10, direction=Region.REVERSE, context=Context(100, False, start_index=1)) r.extend_start(-5) assert r.start == 15 # over origin, forward # 95 96 97 98 99 100 1 2 3 4 # -5 -4 -3 -2 -1 0 r = Region(-5, 5, direction=Region.FORWARD, context=Context(100, True, start_index=1)) r.extend_start(-6) assert r.start == 1 # Retract by length r = Region(5, 10, direction=Region.FORWARD, context=Context(100, True, start_index=1)) r.extend_start(-r.length + 1) assert r.start == r.end # Throw error r = Region(5, 10, direction=Region.FORWARD, context=Context(100, True, start_index=1)) with pytest.raises(RegionError): r.extend_start(-r.length) r = Region(2, 4, direction=Region.FORWARD, context=Context(12, True, start_index=1)) r.extend_start(9) with pytest.raises(RegionError): r.extend_start(10)
def test_region_fuse(): # Fusing two linear regions r = Region(20, 50, context=Context(100, False, start_index=0)) r2 = Region(51, 70, context=Context(100, False, start_index=0)) assert r.consecutive_with(r2) assert not r2.consecutive_with(r) r.fuse(r2) assert r2.fuse(r) is None r = Region(95, 100, context=Context(100, True, start_index=1)) r2 = Region(1, 70, context=Context(100, True, start_index=1)) assert r.consecutive_with(r2) assert not r2.consecutive_with(r) r.fuse(r2) assert r.start == 95 assert r.end == 70 assert r2.start == 1 assert r2.end == 70 r = Region(1, 100, context=Context(100, False, start_index=1)) r2 = Region(5, 99, context=Context(100, False, start_index=1)) assert not r.consecutive_with(r2) r = Region(20, 50, context=Context(100, False, start_index=0)) r2 = Region(51, 70, context=Context(100, False, start_index=0)) f = r.fuse(r2, inplace=False) assert r is not f
def test_create_from_ends(): linear_context = Context(100, False, 1) circular_context = Context(100, True, 1) # Test forward, linear r = Region.create_from_ends(5, 10, linear_context, direction=Region.FORWARD) assert r.left_end == 5 == r.start assert r.right_end == 10 == r.end r = Region.create_from_ends(5, 10, linear_context, direction=Region.REVERSE) assert r.left_end == 5 == r.end assert r.right_end == 10 == r.start # Test forward, linear with error with pytest.raises(RegionError): r = Region.create_from_ends(10, 5, linear_context, direction=Region.FORWARD) with pytest.raises(RegionError): r = Region.create_from_ends(10, 5, linear_context, direction=Region.REVERSE) # when left end > right end is ALWAYS means its circular r = Region.create_from_ends(10, 5, circular_context, direction=Region.FORWARD) assert r.left_end == 10 assert r.right_end == 5 assert r.start == 10 assert r.end == 5 r = Region.create_from_ends(10, 5, circular_context, direction=Region.REVERSE) assert r.left_end == 10 assert r.right_end == 5 assert r.start == 5 assert r.end == 10
def test_extend_end(): # linear, forward r = Region(10, 20, context=Context(100, False, start_index=1)) r.extend_end(5) assert r.end == 25 # linear, reverse r = Region(20, 10, direction=Region.REVERSE, context=Context(100, False, start_index=1)) r.extend_end(5) assert r.end == 5 # linear, forward, negative r = Region(10, 20, context=Context(100, False, start_index=1)) r.extend_end(-5) assert r.end == 15 # linear, reverse, negative r = Region(20, 10, direction=Region.REVERSE, context=Context(100, False, start_index=1)) r.extend_end(-5) assert r.end == 15 # over origin, forward r = Region(-5, 5, direction=Region.FORWARD, context=Context(100, True, start_index=1)) r.extend_end(-5) assert r.end == 100 # Retract by length r = Region(5, 10, direction=Region.FORWARD, context=Context(100, True, start_index=1)) r.extend_end(-r.length + 1) assert r.start == r.end # Raise error r = Region(5, 10, direction=Region.FORWARD, context=Context(100, True, start_index=1)) with pytest.raises(RegionError): r.extend_end(-r.length) r = Region(2, 4, direction=Region.FORWARD, context=Context(12, True, start_index=1)) r.extend_end(9) with pytest.raises(RegionError): r.extend_end(10)
def test_context_schema(): c = Context(1000, True, start_index=2) schema = schemas.ContextSchema() data = schema.dump(c) assert data['circular'] assert data['size'] == 1000 assert data['start'] == 2
def test_fuse(): # Test fuse c1 = Context(10000, True) c2 = Context(5100, True) q1 = ContigRegion(100, 300, context=c1, forward=True) q2 = ContigRegion(301, 600, context=c1, forward=True) q3 = ContigRegion(301, 600, context=c2, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) s2 = ContigRegion(701, 1000, context=c2, forward=True) s3 = ContigRegion(701, 1000, context=c2, forward=True) contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig1.fuse(contig2) assert contig1.query.end == 600 # Raises error if one has sequence and other does not contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") a = "A"*contig1.query.length b = "B"*contig1.subject.length c = "C"*contig2.query.length d = "D"*contig2.subject.length contig1.query.sequence = a contig1.subject.sequence = b contig2.query.sequence = c contig2.subject.sequence = d contig1.fuse(contig2) assert contig1.query.sequence == a + c assert contig1.subject.sequence == b + d # Raises error if one has sequence and other does not contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig1.query.sequence = "AGTCTGAGCTGTCGTGATAGTGCTGA" contig1.subject.sequence = "AGTYAGYCHYAYHCYHYSCHYHAYCY" with pytest.raises(ContigError): contig1.fuse(contig2) # Raise error if different contexts contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig3 = Contig(q3, s3, "BLAST") with pytest.raises(ContigError): contig3.fuse(contig2)
def test_gap_cost_both_extendable(): subject = ContigRegion(1, 100, Context(9795, True)) query_context = Context(20000, True) query = ContigRegion(1, 100, query_context) left = BlastContig(query, subject, "test") from matplotlib import pyplot as plt for left_extendable, right_extendable, color in [(True, True, "blue"), (False, True, "green"), (False, False, "red")]: X = [] Y = [] left.lp_extendable = left_extendable left.rp_extendable = left_extendable for right_start in range(1, 1000): right = BlastContig( ContigRegion(right_start, right_start + 99, query_context), subject, "test") right.lp_extendable = right_extendable right.rp_extendable = right_extendable gap_cost = Assembly._gap_cost(left, right) left_end = left.query.end right_start = right.query.start distance = right_start - left_end print( f"{left_end} {right_start} Distance: {distance}, Gap_cost: {gap_cost}" ) X.append(distance) Y.append(gap_cost) plt.scatter(X, Y, s=1.0, c=[color] * len(Y), label=f"{left_extendable + right_extendable}") if False: plt.ylim(0, 500) plt.xlabel("Distance (bp)") plt.ylabel("Gap Cost ($)") plt.legend(loc=2) plt.show()
def test_str_and_repr(): linear_context = Context(100, False, 1) r = Region.create_from_ends(5, 10, linear_context, direction=Region.FORWARD) print(r) print(str(r))
def test_end_setter(): r = Region(2, 5, context=Context(9, False, start_index=1)) r.left_end = 1 assert r.start == 1 == r.left_end == r.lp r.right_end = 6 assert r.end == 6 == r.right_end == r.rp r = Region(5, 2, direction=Region.REVERSE, context=Context(9, False, start_index=1)) r.left_end = 1 assert r.end == 1 == r.left_end == r.lp r.right_end = 6 assert r.start == 6 == r.right_end == r.rp
def test_subregion(): r = Region(20, 50, context=Context(100, False, start_index=0)) r.sub_region(20, 30) s = r.sub_region(30, 50) assert s.start == 30 assert s.end == 50 with pytest.raises(RegionError): r.sub_region(19, 30) with pytest.raises(RegionError): r.sub_region(30, 51) r = Region(90, 10, context=Context(100, True, start_index=0)) s = r.sub_region(95, 5) s_compare = Region(95, 5, context=Context(100, True, start_index=0)) assert s.start == 95 assert s.end == 5 assert s_compare.length == s.length
def test_region_copy(): r = Region(50, 20, context=Context(1000, False, 2), name='name', direction=Region.REVERSE) r2 = r.copy() assert r.same_context(r2)
def test_lp_and_rp(): r = Region(1, 3, direction=Region.FORWARD, context=Context(9, True, start_index=1)) assert r.lp == 1 assert r.rp == 3 assert r.start == r.lp assert r.end == r.rp r = Region(3, 1, direction=Region.REVERSE, context=Context(9, False, start_index=1)) assert r.lp == 1 assert r.rp == 3 assert r.start == r.rp assert r.end == r.lp
def test_circular_gaps(): """ context: |-------------------------| r1: ----| |----- r2: |----| r1.get_gap(r2) |==| r2.get_gap(r1) |==| """ r1 = Region(80, 20, context=Context(100, True, start_index=0)) r2 = Region(50, 60, context=Context(100, True, start_index=0)) g1 = r1.get_gap(r2) assert g1.start == 21 assert g1.end == 49 g2 = r2.get_gap(r1) assert g2.start == 61 assert g2.end == 79
def test_region_schema(): c = Context(1000, True, start_index=2) r = Region(5, 100, c, direction=Region.REVERSE, name="New region") schema = schemas.RegionSchema() data = schema.dump(r) assert data['start'] == 5 assert data['end'] == 100 assert data['context']['length'] == 1000 assert data['direction'] == Region.REVERSE assert data['length'] == len(r)
def test_self_circular_gap(): """ context: |-------------------------| r1: ----| |----- r1.gap(r1) |>>>>>>>>>>>>| """ r1 = Region(80, 20, context=Context(100, True, start_index=0)) g = r1.get_gap(r1) assert g.start == 21 assert g.end == 79
def test_gap_span(): r = Region(1, 100, context=Context(100, True, start_index=1)) r2 = Region(10, 90, context=Context(100, True, start_index=1)) assert r.get_gap_span(r2) is None r = Region(1, 10, context=Context(100, True, start_index=1)) r2 = Region(10, 90, context=Context(100, True, start_index=1)) assert r.get_gap_span(r2) == -1 r = Region(1, 10, context=Context(100, True, start_index=1)) r2 = Region(11, 90, context=Context(100, True, start_index=1)) assert r.get_gap_span(r2) == 0 r = Region(1, 10, context=Context(100, True, start_index=1)) r2 = Region(8, 90, context=Context(100, True, start_index=1)) assert r.get_gap_span(r2) == -3 r = Region(1, 10, context=Context(100, True, start_index=1)) r2 = Region(15, 90, context=Context(100, True, start_index=1)) assert r.get_gap_span(r2) == 4
def test_contig_schema(): c1 = Context(10000, True) c2 = Context(5100, True) q1 = ContigRegion(100, 300, context=c1, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) contig1 = Contig(q1, s1, "BLAST", data1=5) schema = schemas.ContigSchema() data = schema.dump(contig1) assert data['query']['start'] == 100 assert data['query']['end'] == 300 assert data['query']['context']['length'] == 10000 assert data['subject']['start'] == 500 assert data['subject']['end'] == 700 assert data['subject']['context']['length'] == 5100 assert data['quality'] assert data['metadata'] == {'data1': 5} assert data['alignment_length'] == 201
def test_divide_contigs(): c1 = Context(10000, True) c2 = Context(8000, True) q1 = ContigRegion(1000, 3000, context=c1, forward=True) s1 = ContigRegion(5000, 7000, context=c2, forward=True) contig = Contig(q1, s1, "BLAST") # not including self divided_contigs = contig.divide_contig( [1100, 1200], [2000, 2200, 2300], include_self=False) assert len(divided_contigs) == 12 # including self divided_contigs = contig.divide_contig( [1100, 1200], [2000, 2200, 2300], include_self=True) assert len(divided_contigs) == 13
def test_region(): r = Region(2, 2, context=Context(3, True, start_index=1)) assert r.bounds_end == 3 s = 1 e = 2 l = 10 start_index = 1 r = Region(s, e, context=Context(l, True, start_index=start_index)) assert r.bounds_end == 10 assert r.bounds_start == start_index indices = list(range(start_index - 3, start_index + l - 1 + 3)) values = list(range(start_index, l + start_index)) print(values) assert len(values) == l for x in range(start_index - 3, start_index + l - 1): assert r.context.translate_pos(x) == values[x - start_index] for x in range(start_index + l - 1, start_index + l - 1 + 3): assert r.context.translate_pos(x) == values[x - start_index - len(values)] with pytest.raises(RegionError): Region(500, 100, context=Context(1000, False)) start_index = 10 r = Region(100, 500, context=Context(1000, True, start_index=start_index)) assert r.bounds_start == start_index assert r.bounds_end == start_index + r.context_length - 1 for x in range(-5, 300): r = Region(100, x, context=Context(200, True, start_index=30)) assert r.end <= r.bounds_end for x in range(-5, 300): r = Region(x, 100, context=Context(200, True, start_index=30)) assert r.start >= r.bounds_start