def test_merge_pair(record1, record2, record4): """ Assemble a compatible overlapping read pair. GCTGCACCGATGTACGCAAA ||||| --> GCTGCACCGATGTACGCAAAGCTATTTAAAACC ACGCAAAGCTATTTAAAACC """ pair = OverlappingReadPair(tail=record1, head=record2, offset=13, overlap=7, sameorient=True, swapped=False) assert merge_pair(pair) == 'GCTGCACCGATGTACGCAAAGCTATTTAAAACC' pair = OverlappingReadPair(tail=record1, head=record4, offset=13, overlap=7, sameorient=True, swapped=False) with pytest.raises(AssertionError) as ae: contig = merge_pair(pair) assert 'attempted to assemble incompatible reads' in str(ae)
def test_assembly_round2(): instream = kevlar.open(data_file('var1.round2.augfastq'), 'r') graph = kevlar.ReadGraph() graph.load(kevlar.parse_augmented_fastx(instream)) contig = graph.get_record('contig1') read = graph.get_record('read22f start=5,mutations=0') pair = calc_offset(contig, read, 'AAGTCTCGACTTTAAGGAAGTGGGCCTAC') assert pair.tail == read assert pair.head == contig assert merge_pair(pair) == ('TATCACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGT' 'TACACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTA' 'CGGCGGAAGCCGTC')
def test_graph_init(): """Test graph initialization.""" instream = kevlar.open(data_file('var1.reads.augfastq'), 'r') graph = kevlar.ReadGraph() graph.load(kevlar.parse_augmented_fastx(instream)) graph.populate_edges(strict=True) # 10 reads in the file, but read16f has no valid connections due to error assert len(graph.nodes()) == 10 # The given read shares its interesting k-mer and has compatible overlaps # with 6 other reads (read13f and read15f have errors). r23name = 'read23f start=67,mutations=0' assert len(graph[r23name]) == 6 # Test the values of one of the edges. r35name = 'read35f start=25,mutations=0' assert graph[r23name][r35name]['offset'] == 42 assert graph[r23name][r35name]['overlap'] == 58 # Should all be a single CC assert len(list(connected_components(graph))) == 2 assert len([p for p in graph.partitions()]) == 1 r8name = 'read8f start=8,mutations=0' r37name = 'read37f start=9,mutations=0' assert graph[r37name][r8name]['offset'] == 1 assert graph[r37name][r8name]['overlap'] == 99 pair = OverlappingReadPair(tail=graph.get_record(r8name), head=graph.get_record(r37name), offset=1, overlap=99, sameorient=True, swapped=False) assert merge_pair(pair) == ('CACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGTTAC' 'ACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTACGG' 'CGGAAGCCGTC')