示例#1
0
def run(options):
    assert len(options.reads_files) % 2 == 0
    reads1_list = [
        filename for i, filename in enumerate(options.reads_files)
        if i % 2 == 0
    ]
    reads2_list = [
        filename for i, filename in enumerate(options.reads_files)
        if i % 2 != 0
    ]
    assert len(reads1_list) == len(reads2_list)
    read_map.map_reads_set(
        options.ref_fasta,
        reads1_list,
        reads2_list,
        options.outfile,
        rmdup=not options.unsorted_sam,
        read_group=("1", options.sample_name),
    )
示例#2
0
    def test_map_reads_set(self):
        """test map_reads_set"""
        reads1 = [
            os.path.join(data_dir, "reads_set_" + str(i) + ".1.fq")
            for i in range(1, 4, 1)
        ]
        reads2 = [
            os.path.join(data_dir, "reads_set_" + str(i) + ".2.fq")
            for i in range(1, 4, 1)
        ]
        ref_fasta = os.path.join(data_dir, "ref.fa")
        try:
            os.unlink(tmp_bam)
        except:
            pass
        tmp_bam = "tmp.read_map.map_reads_set.bam"

        def reads_in_bam(bam_file):
            return len(
                list(pysam.Samfile(bam_file, "rb").fetch(until_eof=True)))

        # check works when list length is 1. Should have 12 read pairs in the bam
        read_map.map_reads_set(ref_fasta, [reads1[0]], [reads2[0]],
                               tmp_bam,
                               rmdup=True)
        self.assertTrue(os.path.exists(tmp_bam))
        self.assertEqual(24, reads_in_bam(tmp_bam))
        os.unlink(tmp_bam)

        # check works when list length > 1, Should have 35 read pairs in the bam
        read_map.map_reads_set(
            ref_fasta,
            reads1,
            reads2,
            tmp_bam,
            rmdup=True,
            read_group=("1", "GROUP_NAME"),
        )
        self.assertEqual(70, reads_in_bam(tmp_bam))
        os.unlink(tmp_bam)
示例#3
0
    def test_map_reads_set(self):
        '''test map_reads_set'''
        reads1 = [
            os.path.join(data_dir, 'reads_set_' + str(i) + '.1.fq')
            for i in range(1, 4, 1)
        ]
        reads2 = [
            os.path.join(data_dir, 'reads_set_' + str(i) + '.2.fq')
            for i in range(1, 4, 1)
        ]
        ref_fasta = os.path.join(data_dir, 'ref.fa')
        try:
            os.unlink(tmp_bam)
        except:
            pass
        tmp_bam = 'tmp.read_map.map_reads_set.bam'

        def reads_in_bam(bam_file):
            return len(
                list(pysam.Samfile(bam_file, 'rb').fetch(until_eof=True)))

        # check works when list length is 1. Should have 12 read pairs in the bam
        read_map.map_reads_set(ref_fasta, [reads1[0]], [reads2[0]],
                               tmp_bam,
                               rmdup=True)
        self.assertTrue(os.path.exists(tmp_bam))
        self.assertEqual(24, reads_in_bam(tmp_bam))
        os.unlink(tmp_bam)

        # check works when list length > 1, Should have 35 read pairs in the bam
        read_map.map_reads_set(ref_fasta,
                               reads1,
                               reads2,
                               tmp_bam,
                               rmdup=True,
                               read_group=('1', 'GROUP_NAME'))
        self.assertEqual(70, reads_in_bam(tmp_bam))
        os.unlink(tmp_bam)
示例#4
0
def run(
    reads1_list,
    reads2_list,
    ref_dir,
    outdir,
    sample_name="sample",
    cortex_mem_height=22,
    debug=False,
    keep_bam=False,
):
    if len(reads1_list) != len(reads2_list):
        raise Exception(
            "Must give same number of forward and reverse reads files. Got:\nForward:{reads1_list}\nReverse:{reads2_list}"
        )

    os.mkdir(outdir)

    trimmed_reads_1 = []
    trimmed_reads_2 = []
    for i in range(len(reads1_list)):
        trimmed_reads_1.append(
            os.path.join(outdir, f"trimmed_reads.{i}.1.fq.gz"))
        trimmed_reads_2.append(
            os.path.join(outdir, f"trimmed_reads.{i}.2.fq.gz"))
        read_trim.run_trimmomatic(
            reads1_list[i],
            reads2_list[i],
            trimmed_reads_1[-1],
            trimmed_reads_2[-1],
        )

    refdir = reference_dir.ReferenceDir(directory=ref_dir)
    rmdup_bam = os.path.join(outdir, "map.bam")
    read_map.map_reads_set(
        refdir.ref_fasta,
        trimmed_reads_1,
        trimmed_reads_2,
        rmdup_bam,
        rmdup=True,
        read_group=("1", sample_name),
    )
    utils.syscall(f"samtools index {rmdup_bam}")
    if not debug:
        for filename in trimmed_reads_1 + trimmed_reads_2:
            os.unlink(filename)

    samtools_vcf = os.path.join(outdir, "samtools.vcf")
    cmd = f"bcftools mpileup --output-type u -f {refdir.ref_fasta} {rmdup_bam} | bcftools call -vm -O v -o {samtools_vcf}"
    utils.syscall(cmd)

    cortex_dir = os.path.join(outdir, "cortex")
    ctx = cortex.CortexRunCalls(
        refdir.directory,
        rmdup_bam,
        cortex_dir,
        sample_name,
        mem_height=cortex_mem_height,
    )
    ctx.run(run_mccortex_view_kmers=False)
    ctx_vcf_dir = os.path.join(cortex_dir, "cortex.out", "vcfs")
    cortex_vcfs = [
        os.path.join(ctx_vcf_dir, x) for x in os.listdir(ctx_vcf_dir)
        if x.endswith("raw.vcf")
    ]
    if len(cortex_vcfs) != 1:
        raise Exception("Error running cortex. Could not find output VCF file")
    cortex_vcf = os.path.join(outdir, "cortex.vcf")
    os.rename(cortex_vcfs[0], cortex_vcf)
    if not debug:
        utils.syscall(f"rm -rf {cortex_dir}")

    minos_dir = os.path.join(outdir, "minos")
    cmd = f"minos adjudicate --reads {rmdup_bam} {minos_dir} {refdir.ref_fasta} {samtools_vcf} {cortex_vcf}"
    utils.syscall(cmd)
    os.rename(os.path.join(minos_dir, "final.vcf"),
              os.path.join(outdir, "final.vcf"))
    if not debug:
        utils.syscall(f"rm -rf {minos_dir}")

    if not (keep_bam or debug):
        os.unlink(rmdup_bam)
        os.unlink(rmdup_bam + ".bai")

    final_vcf = os.path.join(outdir, "final.vcf")
    if not os.path.exists(final_vcf):
        raise Exception(f"Error. Final VCF file not found: {final_vcf}")

    logging.info(f"Finished variant calling. Final VCF file: {final_vcf}")