예제 #1
0
def run_mutect_one_job(tempdir, vcf, reference, intervals, normal_bam,
                       tumour_bam):
    commands = []
    for i, interval in enumerate(intervals):
        ival_temp_dir = os.path.join(tempdir, str(i))
        helpers.makedirs(ival_temp_dir)
        unfiltered_output = os.path.join(ival_temp_dir, 'mutect.vcf.gz')
        cmd = mutect_run_command(reference, interval, normal_bam, tumour_bam,
                                 unfiltered_output)
        commands.append(cmd)

        output = os.path.join(ival_temp_dir, 'mutect.vcf.gz')
        cmd = mutect_filter_command(reference, unfiltered_output, output)
        commands.append(cmd)

    parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir)

    vcf_files = [
        os.path.join(tempdir, str(i), 'mutect.vcf.gz')
        for i in range(len(intervals))
    ]
    merge_tempdir = os.path.join(tempdir, 'mutect_merge')
    helpers.makedirs(merge_tempdir)
    merge_vcfs(vcf_files, vcf, merge_tempdir)
예제 #2
0
def run_samtools_germline_one_job(tempdir,
                                  vcf,
                                  reference,
                                  intervals,
                                  bam_file,
                                  samtools_docker_image=None,
                                  vcftools_docker_image=None):
    commands = []
    for i, interval in enumerate(intervals):
        ival_temp_dir = os.path.join(tempdir, str(i))
        helpers.makedirs(ival_temp_dir)
        output = os.path.join(ival_temp_dir, 'germline.vcf.gz')
        cmd = samtools_germline_command(output, reference, interval, bam_file)
        commands.append(cmd)

    parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir,
                                samtools_docker_image)

    vcf_files = [
        os.path.join(tempdir, str(i), 'germline.vcf.gz')
        for i in range(len(intervals))
    ]
    merge_tempdir = os.path.join(tempdir, 'germline_merge')
    helpers.makedirs(merge_tempdir)
    merge_vcfs(vcf_files,
               vcf,
               merge_tempdir,
               docker_image=vcftools_docker_image)
예제 #3
0
def run_samtools_germline_one_job(tempdir, vcf, reference, intervals,
                                  bam_file):
    commands = []
    for i, interval in enumerate(intervals):
        ival_temp_dir = os.path.join(tempdir, str(i))
        helpers.makedirs(ival_temp_dir)
        output = os.path.join(ival_temp_dir, 'germline.vcf.gz')
        cmd = samtools_germline_command(output, reference, interval, bam_file)
        commands.append(cmd)

    parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir)

    vcf_files = [
        os.path.join(tempdir, str(i), 'germline.vcf.gz')
        for i in range(len(intervals))
    ]
    merge_tempdir = os.path.join(tempdir, 'germline_merge')
    helpers.makedirs(merge_tempdir)

    temp_vcf = os.path.join(merge_tempdir, 'merged_rtg.vcf')
    merge_vcfs(vcf_files, temp_vcf, merge_tempdir)

    normal_id = bamutils.get_sample_id(bam_file)
    vcfutils.update_germline_header_sample_ids(temp_vcf, vcf, normal_id)
예제 #4
0
def run_museq_one_job(tempdir,
                      museq_vcf,
                      reference,
                      intervals,
                      museq_params,
                      tumour_bam=None,
                      normal_bam=None,
                      titan_mode=False):
    '''
    Run museq script for all chromosomes and merge VCF files

    :param tumour: path to tumour bam
    :param normal: path to normal bam
    :param out: path to the temporary output VCF file for the merged VCF files
    :param log: path to the log file
    :param config: path to the config YAML file
    '''

    commands = []
    for i, interval in enumerate(intervals):
        ival_temp_dir = os.path.join(tempdir, str(i))
        helpers.makedirs(ival_temp_dir)
        output = os.path.join(ival_temp_dir, 'museq.vcf')
        log = os.path.join(ival_temp_dir, 'museq.log')

        command = run_museq(output,
                            log,
                            reference,
                            interval,
                            museq_params,
                            ival_temp_dir,
                            tumour_bam=tumour_bam,
                            normal_bam=normal_bam,
                            return_cmd=True,
                            titan_mode=titan_mode)

        commands.append(command)

    parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir)

    vcf_files = [
        os.path.join(tempdir, str(i), 'museq.vcf')
        for i in range(len(intervals))
    ]
    merge_tempdir = os.path.join(tempdir, 'museq_merge')
    helpers.makedirs(merge_tempdir)
    temp_museq_vcf = os.path.join(merge_tempdir, 'temp_museq_merge.vcf')
    merge_vcfs(vcf_files, temp_museq_vcf, merge_tempdir)

    tumour_id = get_sample_id(tumour_bam)
    normal_id = get_sample_id(normal_bam)
    update_header_sample_ids(temp_museq_vcf, museq_vcf, tumour_id, normal_id)
예제 #5
0
def strelka_one_node(
        normal_bam_file,
        tumour_bam_file,
        ref_genome_fasta_file,
        indel_file,
        snv_file,
        tmp_dir,
        regions,
        known_sizes,
        is_exome=False,
):
    commands = []

    chromosomes = [val.split('_')[0] for val in regions]

    for chrom in chromosomes:
        chrom_temp_dir = os.path.join(tmp_dir, 'chroms', str(chrom))

        helpers.makedirs(chrom_temp_dir)

        outfile = os.path.join(chrom_temp_dir, 'depth.txt')

        cmd = [
            'GetChromDepth',
            '--align-file', normal_bam_file,
            '--chrom', chrom,
            '--output-file', outfile,
            # '--ref', ref_genome,
        ]

        commands.append(cmd)

    parallel_temp_dir = os.path.join(tmp_dir, 'gnu_parallel_temp_depths')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir)

    depthfiles = [os.path.join(tmp_dir, 'chroms', str(chrom), 'depth.txt') for chrom in chromosomes]
    depth_file = os.path.join(tmp_dir, 'chrom_depths.txt')
    merge_chromosome_depths_plain(depthfiles, depth_file)

    commands = []
    for i, region in enumerate(regions):
        ival_temp_dir = os.path.join(tmp_dir, 'intervals', str(i))
        helpers.makedirs(ival_temp_dir)
        indel_out = os.path.join(ival_temp_dir, 'strelka_indel.vcf')
        snv_out = os.path.join(ival_temp_dir, 'strelka_snv.vcf')
        stats_out = os.path.join(ival_temp_dir, 'stats.txt')

        cmd = genome_segment_cmd(
            depth_file,
            normal_bam_file,
            tumour_bam_file,
            ref_genome_fasta_file,
            indel_out,
            snv_out,
            stats_out,
            region,
            known_sizes,
            is_exome=is_exome,
        )
        commands.append(cmd)

    parallel_temp_dir = os.path.join(tmp_dir, 'gnu_parallel_temp')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir)

    indel_files = [os.path.join(tmp_dir, 'intervals', str(i), 'strelka_indel.vcf')
                   for i, region in enumerate(regions)]

    merge_temp = os.path.join(tmp_dir, 'snv_merge')
    snv_files = [os.path.join(tmp_dir, 'intervals', str(i), 'strelka_snv.vcf')
                 for i, region in enumerate(regions)]

    temp_strelka_snv = os.path.join(tmp_dir, 'snv_merge', 'temp_strelka_merge_snv.vcf')
    concatenate_vcf(snv_files, temp_strelka_snv, merge_temp)

    temp_strelka_indel = os.path.join(tmp_dir, 'indel_merge' 'temp_strelka_merge_indel.vcf')
    concatenate_vcf(indel_files, temp_strelka_indel, merge_temp)

    tumour_id = get_sample_id(tumour_bam_file)
    normal_id = get_sample_id(normal_bam_file)
    update_header_sample_ids(temp_strelka_snv, snv_file, tumour_id, normal_id)
    update_header_sample_ids(temp_strelka_indel, indel_file, tumour_id, normal_id)