Exemplo n.º 1
0
 def test_prepare_nextflow_input_files(self):
     """test _prepare_nextflow_input_files"""
     # Contents of the files is checked elsewhere.
     # We'll just check that the files exist
     ref_fasta = "tmp.prepare_nextflow_input_files.in.ref.fa"
     outdir = "tmp.prepare_nextflow_input_files.outdir"
     data_tsv = "tmp.prepare_nextflow_input_files.in.tsv"
     vcf_file = "tmp.prepare_nextflow_input_files.in.vcf"
     reads_file = "tmp.prepare_nextflow_input_files.in.reads"
     with open(ref_fasta, "w") as f:
         pass
     with open(data_tsv, "w") as f:
         print(vcf_file, reads_file, sep="\t", file=f)
     with open(vcf_file, "w"), open(reads_file, "w"):
         pass
     if os.path.exists(outdir):
         shutil.rmtree(outdir)
     pipeline = multi_sample_pipeline.MultiSamplePipeline(
         ref_fasta, data_tsv, outdir)
     pipeline._make_output_dir()
     pipeline._prepare_nextflow_input_files()
     self.assertTrue(os.path.exists(outdir))
     self.assertTrue(os.path.exists(pipeline.nextflow_input_tsv))
     shutil.rmtree(outdir)
     os.unlink(ref_fasta)
     os.unlink(data_tsv)
     os.unlink(vcf_file)
     os.unlink(reads_file)
Exemplo n.º 2
0
def run(options):
    pipeline = multi_sample_pipeline.MultiSamplePipeline(
        options.ref_fasta,
        options.data_tsv,
        options.outdir,
        max_alleles_per_cluster=options.max_alleles_per_cluster,
        min_large_ref_length=options.min_large_ref_length,
        gramtools_max_read_length=options.max_read_length,
        gramtools_kmer_size=options.gramtools_kmer_size,
        gramtools_build_threads=options.gramtools_build_threads,
        nextflow_config_file=options.nextflow_config_file,
        nextflow_work_dir=options.nextflow_work_dir,
        force=options.force,
        no_run=options.no_run,
        clean=not options.no_clean,
        variants_per_split=options.variants_per_split,
        alleles_per_split=options.alleles_per_split,
        total_splits=options.total_splits,
        nf_ram_cluster_small_vars=options.nf_ram_cluster_small_vars,
        nf_ram_gramtools_build_small=options.nf_ram_gramtools_build_small,
        nf_ram_minos_small_vars=options.nf_ram_minos_small_vars,
        nf_ram_merge_small_vars=options.nf_ram_merge_small_vars,
        testing=options.testing,
        use_unmapped_reads=options.use_unmapped_reads,
    )
    pipeline.run()
Exemplo n.º 3
0
 def test_prepare_nextflow_input_files(self):
     '''test _prepare_nextflow_input_files'''
     # Contents of the files is checked elsewhere.
     # We'll just check that the files exist
     ref_fasta = 'tmp.prepare_nextflow_input_files.in.ref.fa'
     outdir = 'tmp.prepare_nextflow_input_files.outdir'
     data_tsv = 'tmp.prepare_nextflow_input_files.in.tsv'
     vcf_file = 'tmp.prepare_nextflow_input_files.in.vcf'
     reads_file = 'tmp.prepare_nextflow_input_files.in.reads'
     with open(ref_fasta, 'w') as f:
         pass
     with open(data_tsv, 'w') as f:
         print(vcf_file, reads_file, sep='\t', file=f)
     with open(vcf_file, 'w'), open(reads_file, 'w'):
         pass
     if os.path.exists(outdir):
         shutil.rmtree(outdir)
     pipeline = multi_sample_pipeline.MultiSamplePipeline(
         ref_fasta, data_tsv, outdir)
     pipeline._make_output_dir()
     pipeline._prepare_nextflow_input_files()
     self.assertTrue(os.path.exists(outdir))
     self.assertTrue(os.path.exists(pipeline.nextflow_input_tsv))
     shutil.rmtree(outdir)
     os.unlink(ref_fasta)
     os.unlink(data_tsv)
     os.unlink(vcf_file)
     os.unlink(reads_file)
Exemplo n.º 4
0
    def test_run_with_small_var_vcf_chunking_total_splits(self):
        '''test run with chunking small variatn VCF file using total_splits option'''
        input_tsv = 'tmp.multi_sample_pipeline.run.in.tsv'
        ref_fasta = os.path.join(data_dir, 'run.ref.0.fa')
        with open(input_tsv, 'w') as f:
            for i in '1', '2':
                reads = os.path.join(data_dir, 'run.reads.' + i + '.sorted.bam')
                vcf = os.path.join(data_dir, 'run.calls.' + i + '.vcf')
                print(vcf, reads, sep='\t', file=f)

        outdir = 'tmp.multi_sample_pipeline.run.out'
        if os.path.exists(outdir):
            shutil.rmtree(outdir)

        pipeline = multi_sample_pipeline.MultiSamplePipeline(ref_fasta, input_tsv, outdir, total_splits=3, min_large_ref_length=10, testing=True, clean=False)
        pipeline.run()

        expected_vcf = os.path.join(data_dir, 'run.out.vcf')
        expected_header, expected_lines = vcf_file_read.vcf_file_to_list(expected_vcf)
        got_vcf = os.path.join(outdir, 'combined_calls.vcf')
        self.assertTrue(os.path.exists(got_vcf))
        got_header, got_lines = vcf_file_read.vcf_file_to_list(got_vcf)
        # the datei, minos version, and bcftools verisons might not match
        expected_header = [x for x in expected_header if not (x.startswith('##fileDate') or x.startswith('##source=minos') or x.startswith('##bcftools_mergeVersion'))]
        got_header = [x for x in got_header if not (x.startswith('##fileDate') or x.startswith('##source=minos') or x.startswith('##bcftools_mergeVersion'))]
        self.assertEqual(expected_header, got_header)
        self.assertEqual(expected_lines, got_lines)

        shutil.rmtree(outdir)
        os.unlink(input_tsv)
Exemplo n.º 5
0
    def _test_run_with_small_var_vcf_chunking_vars_per_split(self):
        """test run with chunking small variant VCF file using variants_per_split option"""
        input_tsv = "tmp.multi_sample_pipeline.run.in.tsv"
        ref_fasta = os.path.join(data_dir, "run.ref.0.fa")
        with open(input_tsv, "w") as f:
            for i in "1", "2":
                reads1 = os.path.join(data_dir,
                                      "run.reads." + i + ".sorted.bam")
                reads2 = os.path.join(data_dir,
                                      "run.reads." + i + ".sorted.bam")
                vcf = os.path.join(data_dir, "run.calls." + i + ".vcf")
                print(vcf, reads1, reads2, sep="\t", file=f)

        outdir = "tmp.multi_sample_pipeline.run.out"
        if os.path.exists(outdir):
            shutil.rmtree(outdir)

        pipeline = multi_sample_pipeline.MultiSamplePipeline(
            ref_fasta,
            input_tsv,
            outdir,
            variants_per_split=3,
            min_large_ref_length=10,
            testing=True,
            clean=False,
        )
        pipeline.run()

        expected_vcf = os.path.join(data_dir, "run.out.vcf")
        expected_header, expected_lines = vcf_file_read.vcf_file_to_list(
            expected_vcf)
        got_vcf = os.path.join(outdir, "combined_calls.vcf")
        self.assertTrue(os.path.exists(got_vcf))
        got_header, got_lines = vcf_file_read.vcf_file_to_list(got_vcf)
        # the datei, minos version, and bcftools verisons might not match
        expected_header = [
            x for x in expected_header
            if not (x.startswith("##fileDate") or x.startswith(
                "##source=minos") or x.startswith("##bcftools_mergeVersion"))
        ]
        got_header = [
            x for x in got_header
            if not (x.startswith("##fileDate") or x.startswith(
                "##source=minos") or x.startswith("##bcftools_mergeVersion"))
        ]
        self.assertEqual(expected_header, got_header)
        self.assertEqual(expected_lines, got_lines)

        shutil.rmtree(outdir)
        os.unlink(input_tsv)