Exemplo n.º 1
0
    def _run(self):
        mccortex_graphs = []
        self.mccortex_builders.append(self.mccortex_builder)
        for idx, mc_builder in enumerate(self.mccortex_builders):
            mc_builder.with_kmer_size(self.kmer_size)
            builder_dir = self.tmpdir / 'mc_graph_{}'.format(idx)
            builder_dir.mkdir()
            mc_graph, _ = mc_builder.build(builder_dir)
            mccortex_graphs.append(mc_graph)

        contig_fasta = self._write_contig_fasta()

        self.traversal = self.tmpdir / 'traversal.ctx'
        if self.spawn_process is None:
            ctp_runner = runner.Cortexpy(SPAWN_PROCESS)
        else:
            ctp_runner = runner.Cortexpy(self.spawn_process)
        return ctp_runner.subgraph(
            graphs=mccortex_graphs,
            out=self.traversal,
            contig=contig_fasta,
            contig_fasta=True,
            verbose=self.verbose,
            silent=self.silent,
            colors=self.colors,
            logging_interval=self.logging_interval_seconds)
Exemplo n.º 2
0
    def test_outputs_contigs_for_bubble(self, tmpdir):
        # given
        records = [
            'AAACCC',
            'AAAGCCC',
        ]
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .traverse(graph=output_graph, contig='AAA')

        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        for record in records:
            expect.has_record(record)
        expect.has_n_records(2)
Exemplo n.º 3
0
    def run(self):
        self.traverse_driver.run()
        if self.subgraphs:
            assert self.to_json
            out_prefix = Path(str(self.tmpdir)) / 'subgraphs'
        else:
            out_prefix = None

        links = self.link_builder.build(self.tmpdir,
                                        self.traverse_driver.traversal)

        ret = runner.Cortexpy(SPAWN_PROCESS).traverse(
            self.traverse_driver.traversal,
            to_json=self.to_json,
            contig=self.seed_strings,
            graph_index=self.graph_index,
            extra_start_kmer=self.extra_start_kmer,
            links_file=links)
        print(ret.stdout)
        print(ret.stderr, file=sys.stderr)
        assert ret.returncode == 0, ret
        if self.to_json:
            if self.subgraphs:
                return expectation.JsonGraphs(
                    list(out_prefix.parent.glob(out_prefix.name + '*')))
            else:
                return expectation.JsonGraph(json.loads(ret.stdout))
        return expectation.Fasta(ret.stdout)
Exemplo n.º 4
0
    def test_dal19_data(self, tmpdir):
        # given
        records = [
            'CCCCGAGGGAAGCTCTATGAATTCGCCAATCCCAGTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGT',
            'GTATGCAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATA',
            'TAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACATAGATAACACTACCAAAGAGCAAGACTATCAG'
        ]
        expected_records = [records[0] + records[1][47:] + records[2][48:], records[2]]
        kmer_size = 47
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .traverse(to_json=False,
                      graph=output_graph,
                      contig='CAAAAAATGTTGGAGAGGTATCAAAAGTATTCACAAGAAAGTGACAT')
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        for record in expected_records:
            expect.has_record(record)
        expect.has_n_records(2)
Exemplo n.º 5
0
    def test_outputs_multiple_combinations(self, tmpdir):
        # given
        records = [
            'CAACC',
            'AAACA',
            'AAACT',
        ]
        n_paths = 6
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = (
            runner.Cortexpy(SPAWN_PROCESS).traverse(to_json=False,
                                                    graph=output_graph,
                                                    contig='AAA')
        )
        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        ids = ['g0_p{}'.format(i) for i in range(n_paths)]
        for record in records:
            expect.has_record(record).has_id_in(*ids)
        expect.has_record('CAACT').has_id_in(*ids)
        expect.has_record('CAACA').has_id_in(*ids)
        expect.has_record('AAACC').has_id_in(*ids)
        expect.has_n_records(6)
Exemplo n.º 6
0
    def run(self):
        inital_seq_fasta = self.tmpdir / 'initial_seqs.fasta'
        initial_seqs = [
            SeqRecord(Seq(rec), id=str(idx))
            for idx, rec in enumerate(self.initial_seqs)
        ]
        SeqIO.write(initial_seqs, str(inital_seq_fasta), "fasta")
        output_graph = self.mccortex_builder.build(self.tmpdir)
        output = self.tmpdir / 'output.fa'

        completed_process = runner.Cortexpy(SPAWN_PROCESS).assemble(
            graph=output_graph, initial_seqs=inital_seq_fasta, out=output)

        assert completed_process.returncode == 0, completed_process
        return expectation.Fasta(open(output, 'r').read())
Exemplo n.º 7
0
    def test_prints_single_kmer(self, tmpdir):
        # given
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            'ACCAA').with_kmer_size(kmer_size).build(tmpdir))

        expected_kmer = 'CAA: CAA 1 1 .c...... ........'

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig='CAA').stdout

        # then
        assert [expected_kmer
                ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
Exemplo n.º 8
0
    def test_with_empty_input_outputs_empty_fasta(self, tmpdir):
        # given
        input_graph = str(tmpdir / 'input.gfa')
        output_graph = tmpdir / 'output.fast'
        with open(input_graph, 'w') as fh:
            fh.write(GfaBuilder().build())

        # when
        runner.Cortexpy().traverse(input_graph,
                                   out=output_graph,
                                   input_gfa=True)

        # then
        expect = Fasta(output_graph.read())
        expect.has_no_records()
Exemplo n.º 9
0
    def test_prints_one_missing_kmer(self, tmpdir):
        # given
        kmer_size = 3
        record = 'GGG'
        output_graph = (builder.Mccortex().with_dna_sequence(
            'AAAA').with_kmer_size(kmer_size).build(tmpdir))

        expected_kmer = 'CCC: GGG 0 1 ........ ........'

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=record).stdout

        # then
        assert [expected_kmer
                ] == CortexpyPrintOutputParser(stdout).get_kmer_strings()
Exemplo n.º 10
0
    def test_does_not_raise_without_max_nodes(self, tmpdir):
        # given
        query = 'CAACC'
        records = [query]
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(True).subgraph(graphs=[output_graph],
                                                           contig=query,
                                                           out=tmpdir / 'discarded.pickle',
                                                           )

        # then
        assert 0 == completed_process.returncode
Exemplo n.º 11
0
    def test_raises_on_max_nodes_exceeded(self, tmpdir):
        # given
        query = 'CAA'
        records = ['CAACC']
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        output_graph = maker.build(tmpdir)

        # when
        completed_process = runner.Cortexpy(spawn_process=True).subgraph(graphs=[output_graph],
                                                                         contig=query,
                                                                         max_nodes=1)

        # then
        assert 0 != completed_process.returncode
        assert 'Max nodes (1) exceeded: 3 nodes found' in completed_process.stderr
Exemplo n.º 12
0
    def test_prints_three_kmers_including_one_revcomp(self, tmpdir):
        # given
        record = 'ACCTT'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            record).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'AAG 1 ......G.',
            'ACC 1 .......T',
            'AGG 1 a......T',
        ]

        # when
        stdout = runner.Cortexpy().view_graph(output_graph).stdout

        # then
        assert expected_kmers == CortexpyPrintOutputParser(
            stdout).get_kmer_strings()
Exemplo n.º 13
0
    def run(self):
        mccortex_graph = self.mccortex_builder.build(self.tmpdir)

        contig_fasta = self.tmpdir / 'initial_contigs.fa'
        with open(str(contig_fasta), 'w') as fh:
            SeqIO.write(self.records, fh, 'fasta')
        ctp_runner = runner.Cortexpy(SPAWN_PROCESS)
        pruned_graph = Path(str(mccortex_graph)).with_suffix('.pruned.ctx')
        completed_process = ctp_runner.prune(graph=mccortex_graph,
                                             out=pruned_graph,
                                             remove_tips=self.min_tip_length,
                                             verbose=True)

        assert completed_process.returncode == 0, completed_process
        print(completed_process.stdout)
        print(completed_process.stderr, file=sys.stderr)

        return expectation.graph.KmerGraphExpectation(
            load_cortex_graph(pruned_graph))
Exemplo n.º 14
0
    def test_prints_three_kmers(self, tmpdir):
        # given
        record = 'ACCAA'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            record).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'ACC: ACC 1 1 ....A... ....A...',
            'CCA: CCA 1 1 a...A... a...A...',
            'CAA: CAA 1 1 .c...... .c......',
        ]

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=record).stdout

        # then
        assert expected_kmers == CortexpyPrintOutputParser(
            stdout).get_kmer_strings()
Exemplo n.º 15
0
    def test_prints_one_missing_and_one_revcomp_kmer(self, tmpdir):
        # given
        dna_sequence = 'ACCTT'
        search_record = 'ACTT'
        kmer_size = 3
        output_graph = (builder.Mccortex().with_dna_sequence(
            dna_sequence).with_kmer_size(kmer_size).build(tmpdir))

        expected_kmers = [
            'ACT: ACT 0 1 ........ .......T',
            'AAG: CTT 1 1 .C...... A.......',
        ]

        # when
        stdout = runner.Cortexpy().view_contig(graph=output_graph,
                                               contig=search_record).stdout
        expect = ViewExpectation(stdout)

        # then
        (expect.has_kmer(expected_kmers[0]).has_kmer(
            expected_kmers[1]).has_n_kmers(2))
Exemplo n.º 16
0
    def test_outputs_fasta_of_kmers(self, tmpdir):
        # given
        record = 'ATTCC'
        kmer_size = 3
        output_graph = builder.Mccortex() \
            .with_dna_sequence(record) \
            .with_kmer_size(kmer_size) \
            .build(tmpdir)

        # when
        completed_process = runner.Cortexpy(SPAWN_PROCESS) \
            .view_graph(kmers=True, graph=output_graph)

        stdout = completed_process.stdout

        # then
        assert completed_process.returncode == 0, completed_process
        expect = expectation.Fasta(stdout)
        expect.has_record('AAT')
        expect.has_record('GAA')
        expect.has_record('GGA')
        expect.has_n_records(3)
Exemplo n.º 17
0
    def test_raises_on_max_path_1_exceeded(self, tmpdir, max_paths):
        # given
        MAX_PATH_EXIT_CODE = 64
        records = ['CAACC', 'CAACT']
        kmer_size = 3
        maker = builder.Mccortex().with_kmer_size(kmer_size)
        for rec in records:
            maker.with_dna_sequence(rec)

        # when
        completed_process = runner.Cortexpy(spawn_process=True).traverse(
            graph=maker.build(tmpdir),
            max_paths=max_paths)

        # then
        if max_paths == 1:
            assert MAX_PATH_EXIT_CODE == completed_process.returncode
            assert f'Max paths ({max_paths}) exceeded' in completed_process.stderr
        else:
            assert 0 == completed_process.returncode
            expect = expectation.Fasta(completed_process.stdout)
            expect.has_record('CAACC')
            expect.has_record('CAACT')
            expect.has_n_records(2)