def test_modify_query_ends(query_subject_example):
    query, subject = query_subject_example
    c = Contig(query, subject, "Example")
    le, re = query.left_end + 15, query.right_end - 10
    c.modify_query_ends(le, re)
    assert c.query.left_end == le
    assert c.query.right_end == re
    assert c.query.length == c.subject.length
def test_contig_copy(query_subject_example):
    query, subject = query_subject_example
    c = Contig(query, subject, "Example")
    c_copy = c.copy()
    assert c_copy is not c
    c2_copy = copy(c)
    assert c2_copy is not c
    assert c2_copy.contig_id != c.contig_id
    assert c_copy.contig_id != c.contig_id
    assert c_copy.contig_id != c2_copy.contig_id
def test_modify_subject(query_subject_example):
    query, subject = query_subject_example
    c = Contig(query, subject, "Example")
    qs, qe = query.start-10, query.end-10
    ss, se = subject.start+10, subject.end+10
    c.modify_subject(ss, se)
    assert c.query.start == c.query.context.translate_pos(qs)
    assert c.query.end == c.query.context.translate_pos(qe)
    assert c.query.length == c.subject.length
    assert c.subject.start == c.subject.context.translate_pos(ss)
    assert c.subject.end == c.subject.context.translate_pos(se)
def test_subquery():
    c1 = Context(10000, True)
    c2 = Context(5000, True)
    q1 = ContigRegion(100, 300, context=c1, forward=True)
    s1 = ContigRegion(500, 700, context=c2, forward=True)

    contig = Contig(q1, s1, "BLAST")
    sub_query1 = contig.sub_query(200, 250)
    sub_query2 = contig.sub_query(220, 300)

    assert len(sub_query1) == 51
    assert len(sub_query2) == 81
def test_contig_reverse_direction(query_subject_example):
    """This contig should reverse the direction of contig"""
    query, subject = query_subject_example
    query.sequence = "ACGTAGTGCTGTSCGGCTGTGATGCGT"
    subject.sequence = "ACGTAGTGCTGTSCGGCTGTGATGCGT"
    c = Contig(query, subject, "Example")
    qcopy = query.copy()
    scopy = subject.copy()

    c.reverse_direction()
    assert c.query.start == qcopy.end
    assert c.query.end == qcopy.start
    assert c.subject.start == scopy.end
    assert c.subject.end == scopy.start
    assert c.query.sequence == qcopy.sequence[::-1]
    assert c.subject.sequence == scopy.sequence[::-1]
def test_contig_constructor_with_context_error(query_subject_example):
    """This should raise a ContigError since the region lengths between
    r1 and r2 are different"""
    query, subject = query_subject_example
    with pytest.raises(ContigError):
        r1 = Region(1, 2, context=Context(100, True, start_index=1))
        r2 = Region(1, 20, context=Context(100, True, start_index=1))
        Contig(r1, r2, "Throws error")
def test_divide_contigs():
    c1 = Context(10000, True)
    c2 = Context(8000, True)
    q1 = ContigRegion(1000, 3000, context=c1, forward=True)
    s1 = ContigRegion(5000, 7000, context=c2, forward=True)

    contig = Contig(q1, s1, "BLAST")

    # not including self
    divided_contigs = contig.divide_contig(
        [1100, 1200],
        [2000, 2200, 2300],
        include_self=False)
    assert len(divided_contigs) == 12

    # including self
    divided_contigs = contig.divide_contig(
        [1100, 1200],
        [2000, 2200, 2300],
        include_self=True)
    assert len(divided_contigs) == 13
def test_modify_subject_end(query_subject_example):
    query, subject = query_subject_example
    c = Contig(query, subject, "Example")
    r = c.subject
    meth = c.modify_subject_end
    p1 = r.end+int(r.length * 0.9)
    p2 = r.end+int(r.length * -0.9)
    p3 = r.end+int(r.length * 200.1)
    p4 = r.end+int(r.length * -200.1)

    for p in [p1, p2]:
        meth(p)
        assert r.end == r.context.translate_pos(p)

    for p in [p3, p4]:
        with pytest.raises(RegionError):
            meth(p)
def test_contig_schema():
    c1 = Context(10000, True)
    c2 = Context(5100, True)
    q1 = ContigRegion(100, 300, context=c1, forward=True)
    s1 = ContigRegion(500, 700, context=c2, forward=True)
    contig1 = Contig(q1, s1, "BLAST", data1=5)
    schema = schemas.ContigSchema()
    data = schema.dump(contig1)

    assert data['query']['start'] == 100
    assert data['query']['end'] == 300
    assert data['query']['context']['length'] == 10000

    assert data['subject']['start'] == 500
    assert data['subject']['end'] == 700
    assert data['subject']['context']['length'] == 5100

    assert data['quality']
    assert data['metadata'] == {'data1': 5}
    assert data['alignment_length'] == 201
    def perfect_matches(self, rc=True):
        """
        Pseudo-blast for finding perfect sequence matches (i.e. primers)
        :param rc:
        :return:
        """
        # Get primer sequences (defined in db)
        out, seqs, metadata = self.concate_db_to_fsa()

        contig_container = ContigContainer()

        # Get the query sequence
        query_seq = open_sequence(self.query)[0].seq
        query_seq_str = str(query_seq)
        query_seq_str = re.sub('[nN]', '.', query_seq_str).lower()

        fwd_matches = []
        rev_matches = []
        for seq in seqs:
            seq_str = str(seq.seq)
            try:
                rc_seq_str = dna_reverse_complement(seq_str)
            except KeyError:
                continue
            seq_str = re.sub('[nN]', '.', seq_str).lower()
            rc_seq_str = re.sub('[nN]', '.', rc_seq_str).lower()

            for match in re.finditer(seq_str, query_seq_str):
                subject = ContigRegion(
                    seq.id,
                    ContigRegion.START_INDEX,
                    len(seq),
                    len(seq),
                    False,
                    True,
                    sequence=seq_str,
                )

                query = ContigRegion(
                    self.query,
                    match.start() + ContigRegion.START_INDEX,
                    match.end() + ContigRegion.START_INDEX - 1,
                    self.query_length,
                    self.query_circular,
                    True,
                )

                c = Contig(query, subject, Contig.TYPE_PRIMER)
                contig_container.contigs.append(c)
            for match in re.finditer(rc_seq_str, query_seq_str):
                subject = ContigRegion(
                    seq.id,
                    len(seq),
                    ContigRegion.START_INDEX,
                    len(seq),
                    False,
                    False,
                    sequence=seq_str,
                )

                query = ContigRegion(
                    self.query,
                    match.start() + ContigRegion.START_INDEX,
                    match.end() + ContigRegion.START_INDEX - 1,
                    self.query_length,
                    self.query_circular,
                    True,
                )

                c = Contig(query, subject, Contig.TYPE_PRIMER)
                contig_container.contigs.append(c)
        return contig_container
示例#11
0
def test_alignment_length(query_subject_example):
    query, subject = query_subject_example
    c = Contig(query, subject, "Example")
    assert c.alignment_length == query.length
示例#12
0
def test_invalid_region(query_subject_example):
    query, subject = query_subject_example
    query.start = 2
    with pytest.raises(ContigError):
        c = Contig(query, subject, "Example")
示例#13
0
def test_contig_constructor(query_subject_example):
    """This tests constructing a contig from a subject and query"""
    query, subject = query_subject_example
    c = Contig(query, subject, "Example")
    assert c.query == query
    assert c.subject == subject
示例#14
0
def test_fuse():
    # Test fuse
    c1 = Context(10000, True)
    c2 = Context(5100, True)
    q1 = ContigRegion(100, 300, context=c1, forward=True)
    q2 = ContigRegion(301, 600, context=c1, forward=True)
    q3 = ContigRegion(301, 600, context=c2, forward=True)
    s1 = ContigRegion(500, 700, context=c2, forward=True)
    s2 = ContigRegion(701, 1000, context=c2, forward=True)
    s3 = ContigRegion(701, 1000, context=c2, forward=True)
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")

    contig1.fuse(contig2)
    assert contig1.query.end == 600

    # Raises error if one has sequence and other does not
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")
    a = "A"*contig1.query.length
    b = "B"*contig1.subject.length
    c = "C"*contig2.query.length
    d = "D"*contig2.subject.length
    contig1.query.sequence = a
    contig1.subject.sequence = b
    contig2.query.sequence = c
    contig2.subject.sequence = d

    contig1.fuse(contig2)
    assert contig1.query.sequence == a + c
    assert contig1.subject.sequence == b + d

    # Raises error if one has sequence and other does not
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")
    contig1.query.sequence = "AGTCTGAGCTGTCGTGATAGTGCTGA"
    contig1.subject.sequence = "AGTYAGYCHYAYHCYHYSCHYHAYCY"

    with pytest.raises(ContigError):
        contig1.fuse(contig2)

    # Raise error if different contexts
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")
    contig3 = Contig(q3, s3, "BLAST")
    with pytest.raises(ContigError):
        contig3.fuse(contig2)