def test_outputs_multiple_combinations(self, tmpdir): # given records = [ 'CAACC', 'AAACA', 'AAACT', ] n_paths = 6 kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = ( runner.Cortexpy(SPAWN_PROCESS).traverse(to_json=False, graph=output_graph, contig='AAA') ) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) ids = ['g0_p{}'.format(i) for i in range(n_paths)] for record in records: expect.has_record(record).has_id_in(*ids) expect.has_record('CAACT').has_id_in(*ids) expect.has_record('CAACA').has_id_in(*ids) expect.has_record('AAACC').has_id_in(*ids) expect.has_n_records(6)
def test_with_bubble_and_two_colors_returns_all_kmers( self, tmpdir, ra_constructor): # given kmer_size = 3 output_graph = (builder.Mccortex(kmer_size).with_dna_sequence( 'AAACAAG').with_dna_sequence('AAATAAG').with_dna_sequence( 'AAATAAG', name='sample_1').build(tmpdir)) traverser = Engine(ra_constructor(open(output_graph, 'rb')), traversal_colors=(0, ), orientation=EngineTraversalOrientation.both) # when expect = KmerGraphExpectation(traverser.traverse_from('ACA').graph, sort_edges=True) # then expect.has_node('AAA').has_coverages(2, 1) expect.has_node('AAC').has_coverages(1, 0) expect.has_node('ACA').has_coverages(1, 0) expect.has_node('CAA').has_coverages(1, 0) expect.has_node('AAG').has_coverages(2, 1) expect.has_node('AAT').has_coverages(1, 1) expect.has_node('ATA').has_coverages(1, 1) expect.has_node('TAA').has_coverages(1, 1) expect.has_edges('AAA AAC 0', 'AAC ACA 0', 'ACA CAA 0', 'CAA AAG 0', 'AAA AAT 0', 'AAT ATA 0', 'ATA TAA 0', 'TAA AAG 0', 'AAA AAT 1', 'AAT ATA 1', 'ATA TAA 1', 'AAG TAA 1')
def test_outputs_contigs_for_bubble(self, tmpdir): # given records = [ 'AAACCC', 'AAAGCCC', ] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .traverse(graph=output_graph, contig='AAA') stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) for record in records: expect.has_record(record) expect.has_n_records(2)
def test_dal19_data(self, tmpdir): # given records = [ 'CCCCGAGGGAAGCTCTATGAATTCGCCAATCCCAGTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGT', 'GTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATA', 'TAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATAGATAACACTACCAAAGAGCAAGACTATCAG' ] expected_records = [records[0] + records[1][47:] + records[2][48:], records[2]] kmer_size = 47 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .traverse(to_json=False, graph=output_graph, contig='CAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACAT') stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) for record in expected_records: expect.has_record(record) expect.has_n_records(2)
def test_prints_single_kmer(self, tmpdir): # given kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( 'ACCAA').with_kmer_size(kmer_size).build(tmpdir)) expected_kmer = 'CAA: CAA 1 1 .c...... ........' # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig='CAA').stdout # then assert [expected_kmer ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
def test_with_three_linked_kmers_and_two_colors_returns_three_kmers( self, tmpdir): # given kmer_size = 3 output_graph = (builder.Mccortex(kmer_size).with_dna_sequence( 'AAAC').with_dna_sequence('AAAT').build(tmpdir)) retriever = ContigRetriever(open(output_graph, 'rb')) # when kmer_graph = retriever.get_kmer_graph('GTTT') # then assert set(kmer_graph.nodes) == {'TTT', 'GTT', 'ATT'} assert set(kmer_graph.edges) == {('GTT', 'TTT', 0), ('GTT', 'TTT', 1), ('ATT', 'TTT', 0)}
def test_prints_one_missing_kmer(self, tmpdir): # given kmer_size = 3 record = 'GGG' output_graph = (builder.Mccortex().with_dna_sequence( 'AAAA').with_kmer_size(kmer_size).build(tmpdir)) expected_kmer = 'CCC: GGG 0 1 ........ ........' # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig=record).stdout # then assert [expected_kmer ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
def test_with_two_subgraphs_returns_all_kmers(self, tmpdir, ra_constructor): # given kmer_size = 3 output_graph = (builder.Mccortex(kmer_size).with_dna_sequence( 'AAAT').with_dna_sequence('GGGC').build(tmpdir)) traverser = Engine(ra_constructor(open(output_graph, 'rb')), traversal_colors=(0, ), orientation=EngineTraversalOrientation.both) # when expect = KmerGraphExpectation( traverser.traverse_from_each_kmer_in_iterable(['AAA', 'GGG']).graph) # then expect.has_edges('AAA AAT 0', 'CCC GCC 0')
def test_outputs_json(self, tmpdir): # given record = 'ACCTT' kmer_size = 3 output_graph = builder.Mccortex() \ .with_dna_sequence(record) \ .with_kmer_size(kmer_size) \ .build(tmpdir) # when completed_process = runner \ .Cortexpy(SPAWN_PROCESS) \ .view_contig(to_json=True, graph=output_graph, contig=record) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = JsonGraph(json.loads(stdout)) expect.is_directed()
def test_raises_on_max_nodes_exceeded(self, tmpdir): # given query = 'CAA' records = ['CAACC'] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(spawn_process=True).subgraph(graphs=[output_graph], contig=query, max_nodes=1) # then assert 0 != completed_process.returncode assert 'Max nodes (1) exceeded: 3 nodes found' in completed_process.stderr
def test_does_not_raise_without_max_nodes(self, tmpdir): # given query = 'CAACC' records = [query] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) output_graph = maker.build(tmpdir) # when completed_process = runner.Cortexpy(True).subgraph(graphs=[output_graph], contig=query, out=tmpdir / 'discarded.pickle', ) # then assert 0 == completed_process.returncode
def test_prints_three_kmers_including_one_revcomp(self, tmpdir): # given record = 'ACCTT' kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( record).with_kmer_size(kmer_size).build(tmpdir)) expected_kmers = [ 'AAG 1 ......G.', 'ACC 1 .......T', 'AGG 1 a......T', ] # when stdout = runner.Cortexpy().view_graph(output_graph).stdout # then assert expected_kmers == CortexpyPrintOutputParser( stdout).get_kmer_strings()
def test_prints_three_kmers(self, tmpdir): # given record = 'ACCAA' kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( record).with_kmer_size(kmer_size).build(tmpdir)) expected_kmers = [ 'ACC: ACC 1 1 ....A... ....A...', 'CCA: CCA 1 1 a...A... a...A...', 'CAA: CAA 1 1 .c...... .c......', ] # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig=record).stdout # then assert expected_kmers == CortexpyPrintOutputParser( stdout).get_kmer_strings()
def test_prints_one_missing_and_one_revcomp_kmer(self, tmpdir): # given dna_sequence = 'ACCTT' search_record = 'ACTT' kmer_size = 3 output_graph = (builder.Mccortex().with_dna_sequence( dna_sequence).with_kmer_size(kmer_size).build(tmpdir)) expected_kmers = [ 'ACT: ACT 0 1 ........ .......T', 'AAG: CTT 1 1 .C...... A.......', ] # when stdout = runner.Cortexpy().view_contig(graph=output_graph, contig=search_record).stdout expect = ViewExpectation(stdout) # then (expect.has_kmer(expected_kmers[0]).has_kmer( expected_kmers[1]).has_n_kmers(2))
def test_collapse_kmer_unitigs_option_with_missing_kmers(self, tmpdir): # given record1 = 'AAACCCGAA' record2 = 'ACCG' query_record = record1 + 'G' kmer_size = 3 output_graph = builder.Mccortex() \ .with_dna_sequence(record1) \ .with_dna_sequence(record2) \ .with_kmer_size(kmer_size) \ .build(tmpdir) runner.Mccortex(kmer_size).view(output_graph) # when completed_process = runner \ .Cortexpy(SPAWN_PROCESS) \ .view_contig(contig=query_record, to_json=True, graph=output_graph) # then expect_zero_return_code(completed_process) stdout = completed_process.stdout expect = JsonGraph(json.loads(stdout)) expect.has_n_nodes(4) expect.has_node_repr('AAACC').has_coverages_by_kmer([1, 1], [1, 1], [2, 1]) expect.has_node_repr('C').has_coverages_by_kmer([1, 1]) expect.has_node_repr('GAA').has_coverages_by_kmer([2, 1], [1, 1], [1, 1]) expect.has_node_repr('G').has_coverages_by_kmer([0, 1]) for color in [0, 1]: for edge in [['AAACC', 'C'], ['C', 'GAA']]: expect.has_repr_edge(edge[0], edge[1], color) expect.has_repr_edge('GAA', 'G', 1) expect.has_repr_edge('AAACC', 'GAA', 0) expect.has_n_edges(6)
def test_outputs_fasta_of_kmers(self, tmpdir): # given record = 'ATTCC' kmer_size = 3 output_graph = builder.Mccortex() \ .with_dna_sequence(record) \ .with_kmer_size(kmer_size) \ .build(tmpdir) # when completed_process = runner.Cortexpy(SPAWN_PROCESS) \ .view_graph(kmers=True, graph=output_graph) stdout = completed_process.stdout # then assert completed_process.returncode == 0, completed_process expect = expectation.Fasta(stdout) expect.has_record('AAT') expect.has_record('GAA') expect.has_record('GGA') expect.has_n_records(3)
def test_collapse_kmer_unitigs_option(self, tmpdir): # given record1 = 'AAACCCGAA' record2 = 'ACCG' record3 = 'TTCGGGTTT' kmer_size = 3 output_graph = builder.Mccortex() \ .with_dna_sequence(record1) \ .with_dna_sequence(record2) \ .with_dna_sequence(record3) \ .with_kmer_size(kmer_size) \ .build(tmpdir) runner.Mccortex(kmer_size).view(output_graph) # when completed_process = runner \ .Cortexpy(SPAWN_PROCESS) \ .view_contig(contig=record1, to_json=True, graph=output_graph) # then expect_zero_return_code(completed_process) stdout = completed_process.stdout expect = JsonGraph(json.loads(stdout)) expect.has_colors([0, 1]) expect.has_n_nodes(3) expect.has_node_repr('AAACC').has_coverages_by_kmer([2, 1], [2, 1], [3, 1]) expect.has_node_repr('C').has_coverages_by_kmer([2, 1]) expect.has_node_repr('GAA').has_coverages_by_kmer([3, 1], [2, 1], [2, 1]) for edge in [('AAACC', 'C', 0), ('AAACC', 'C', 1), ('C', 'GAA', 0), ('C', 'GAA', 1), ('AAACC', 'GAA', 0)]: expect.has_repr_edge(*edge) expect.has_n_edges(5)
def test_raises_on_max_path_1_exceeded(self, tmpdir, max_paths): # given MAX_PATH_EXIT_CODE = 64 records = ['CAACC', 'CAACT'] kmer_size = 3 maker = builder.Mccortex().with_kmer_size(kmer_size) for rec in records: maker.with_dna_sequence(rec) # when completed_process = runner.Cortexpy(spawn_process=True).traverse( graph=maker.build(tmpdir), max_paths=max_paths) # then if max_paths == 1: assert MAX_PATH_EXIT_CODE == completed_process.returncode assert f'Max paths ({max_paths}) exceeded' in completed_process.stderr else: assert 0 == completed_process.returncode expect = expectation.Fasta(completed_process.stdout) expect.has_record('CAACC') expect.has_record('CAACT') expect.has_n_records(2)