Пример #1
0
def test_merge_contained_with_offset_and_error(record7, record12):
    """
    Test merge with containment, offset, and a sequencing error.

    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
                                          |||||||||||||||||
                                           |||||||||||||||||
                                            |||||||||||||||||
              CCCGGATACTTGAAGCAGGCAcC
              |||||||||||||||||    *
    """
    pair = calc_offset(record7, record12, 'CCCGGATACTTGAAGCA')
    assert pair == INCOMPATIBLE_PAIR

    pair = OverlappingReadPair(tail=record7,
                               head=record12,
                               offset=10,
                               overlap=23,
                               sameorient=True,
                               swapped=False)
    with pytest.raises(AssertionError) as ae:
        newrecord = merge_and_reannotate(pair, 'WontWork')
    assert 'attempted to assemble incompatible reads' in str(ae)
Пример #2
0
def test_merge_and_reannotate_contained(record7, record10):
    """
    Test merge/reannotation with containment.

    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
                                          |||||||||||||||||
                                           |||||||||||||||||
                                            |||||||||||||||||
    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCT
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
    """
    pair = OverlappingReadPair(tail=record7,
                               head=record10,
                               offset=0,
                               overlap=35,
                               sameorient=True,
                               swapped=False)
    newrecord = merge_and_reannotate(pair, 'ContainedAtOne')
    assert newrecord.name == 'ContainedAtOne'
    assert newrecord.sequence == record7.sequence
    assert newrecord.ikmers == record7.ikmers
Пример #3
0
def test_assembly_contigs():
    instream = kevlar.open(data_file('AluContigs.augfastq'), 'r')
    graph = kevlar.ReadGraph()
    graph.load(kevlar.parse_augmented_fastx(instream))
    contig6 = graph.get_record('contig6')
    contig7 = graph.get_record('contig7')
    pair = calc_offset(contig6, contig7, 'AAAGTTTTCTTAAAAACATATATGGCCGGGC')
    assert pair.offset == 50
    assert pair.overlap == 85
    assert pair.tail == contig6
    newrecord = merge_and_reannotate(pair, 'newcontig')
    assert newrecord.sequence == ('TTGCCCAGGCTGGTCTCAAACTCCTGAGCTCAAAGCGATCTGT'
                                  'CGGCCTGGGCATCCAAAAAAAGTTTTCTTAAAAACATATATGG'
                                  'CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG'
                                  'GCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGACCATCC'
                                  'TGGCTAACACG')
Пример #4
0
def test_merge_and_reannotate_edge_case_opposite_orientation(record7, record9):
    """
    Test merge/reannotation with edge cases to check for fencepost errors.

    CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTATGTGAGGCGATAACTCAA
        |||||||||||||||||
         |||||||||||||||||
              |||||||||||||||||
                                          |||||||||||||||||
                                           |||||||||||||||||
                                            |||||||||||||||||
               reverse complement  ----->  GTATGTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACGCGAGCGCCTTGCT
                                           |||||||||||||||||
                                            |||||||||||||||||
                                                                                     |||||||||||||||||
                                                                                      |||||||||||||||||
    """  # noqa
    pair = OverlappingReadPair(tail=record7,
                               head=record9,
                               offset=39,
                               overlap=21,
                               sameorient=False,
                               swapped=False)
    newrecord = merge_and_reannotate(pair, 'SoMeCoNtIg')
    assert newrecord.name == 'SoMeCoNtIg'
    assert newrecord.sequence == ('CAGGTCCCCACCCGGATACTTGAAGCAGGCAGCCTCAAGGTAT'
                                  'GTGAGGCGATAACTCAAGACCACGGGAGCTCACTTCGTTGACG'
                                  'CGAGCGCCTTGCT')
    assert len(newrecord.ikmers) == 8

    testseqs = [
        'TCCCCACCCGGATACTT',
        'CCCCACCCGGATACTTG',
        'CCCGGATACTTGAAGCA',
        'GGTATGTGAGGCGATAA',
        'GTATGTGAGGCGATAAC',
        'TATGTGAGGCGATAACT',
    ]
    testoffsets = [4, 5, 10, 38, 39, 40, 81, 82]
    for kmer, seq, offset in zip(newrecord.ikmers, testseqs, testoffsets):
        assert kmer.sequence == seq
        assert kmer.offset == offset
Пример #5
0
def test_merge_and_reannotate_opposite_orientation(record1, record3):
    """
    Assemble a read pair and re-annotate the associated interesting k-mers.

    GCTGCACCGATGTACGCAAA
                  |||||                 -->   GCTGCACCGATGTACGCAAAGCTATTTAAAACC
                 ACGCAAAGCTATTTAAAACC                       *****        *****
    """
    pair = OverlappingReadPair(tail=record1,
                               head=record3,
                               offset=13,
                               overlap=7,
                               sameorient=False,
                               swapped=False)
    newrecord = merge_and_reannotate(pair, 'contig1')
    assert newrecord.name == 'contig1'
    assert newrecord.sequence == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC'
    assert len(newrecord.ikmers) == 2
    assert newrecord.ikmers[0].offset == 14
    assert newrecord.ikmers[1].offset == 27