Exemplo n.º 1
0
def run_base_recalibrator(bam, known_sites, ref_fn, recal_table, log_file):
    '''
    Run GATK BaseRecalibrator.
    '''
    quiet_call([
        'java',
        '-Xmx8g',
        '-jar',
        PATHS['gatk'],
        '-T',
        'BaseRecalibrator',
        '-I',
        bam,
        '-knownSites',
        known_sites,
        '--out',
        recal_table,
        '-R',
        ref_fn,
        '-cov',
        'RepeatLengthCovariate',
        '-cov',
        'RepeatUnitCovariate',
        '-log',
        log_file,
    ])
Exemplo n.º 2
0
def create_sequence_dictionary(ref_fn):
    '''
    Run Picard CreateSequenceDictionary if none found.
    '''
    if not os.path.exists(ref_fn.split('.fa')[0] + '.dict'):
        quiet_call([
            PATHS['java'], '-jar',
            PATHS['picard'],
            'CreateSequenceDictionary',
            'R=' + ref_fn,
            'O=' + ref_fn.split('.fa')[0] + '.dict',
        ])
Exemplo n.º 3
0
def realigner_target_creator(ref_fn, in_bam, intervals):
    '''
    Run GATK RealignerTargetCreator.
    '''
    quiet_call([
        PATHS['java'], '-jar',
        PATHS['gatk'],
        '-R', ref_fn,
        '-T', 'RealignerTargetCreator',
        '-I', in_bam,
        '-o', intervals,
    ])
Exemplo n.º 4
0
def align(fastq_1, ref_fn, output_fn, fastq_2=None, p=1):
    '''
    Align reads using Bowtie2.
    '''
    assert os.path.exists(fastq_1)
    if fastq_2:
        assert os.path.exists(fastq_2)

    if fastq_2:
        quiet_call([
            PATHS['bowtie2'],
            '-q',
            '--phred33',
            '-p',
            str(p),
            '-I',
            '0',
            '-X',
            '1000',
            '--fr',
            '--local',
            '--sensitive-local',
            '-S',
            output_fn,
            '-x',
            ref_fn,
            '-1',
            fastq_1,
            '-2',
            fastq_2,
        ])

    else:
        quiet_call([
            PATHS['bowtie2'],
            '-q',
            '--phred33',
            '-p',
            str(p),
            '-I',
            '0',
            '-X',
            '1000',
            '--local',
            '--sensitive-local',
            '-S',
            output_fn,
            '-x',
            ref_fn,
            '-U',
            fastq_1,
        ])
Exemplo n.º 5
0
def run_print_reads_bqsr(input_bam, ref_fn, recal_table, output_bam, log_file):
    '''
    Run GATK PrintReads, observing BQSR relcaibration table.
    '''
    quiet_call([
        PATHS['java'], '-jar',
        PATHS['gatk'],
        '-T', 'PrintReads',
        '-I', input_bam,
        '-R', ref_fn,
        '-BQSR', recal_table,
        '-o', output_bam,
        '-log', log_file,
    ])
Exemplo n.º 6
0
def fix_mate_information(in_bam, out_bam, tmp_dir, max_records):
    '''
    Run Picard FixMateInformation.
    '''
    quiet_call([
        PATHS['java'], '-jar',
        PATHS['picard'],
        'FixMateInformation',
        'VALIDATION_STRINGENCY=SILENT',
        'SO=coordinate',
        'I=' + in_bam,
        'O=' + out_bam,
        'TMP_DIR=' + tmp_dir,
        'MAX_RECORDS_IN_RAM=' + str(max_records),
    ])
Exemplo n.º 7
0
def fix_mate_information(in_bam, out_bam):
    '''
    Run Picard FixMateInformation.
    '''
    quiet_call([
        'java',
        '-Xmx2g',
        '-jar',
        PATHS['picard'],
        'FixMateInformation',
        'VALIDATION_STRINGENCY=SILENT',
        'SO=coordinate',
        'I=' + in_bam,
        'O=' + out_bam,
    ])
Exemplo n.º 8
0
def indel_realigner(ref_fn, log, in_bam, intervals, realigned_bam):
    '''
    Run GATK IndelRealigner.
    '''
    quiet_call([
        PATHS['java'], '-jar',
        PATHS['gatk'],
        '-R', ref_fn,
        '-T', 'IndelRealigner',
        '--maxReadsForRealignment', '100000',
        '-log', log,
        '-I', in_bam,
        '-targetIntervals', intervals,
        '-o', realigned_bam,
    ])
Exemplo n.º 9
0
def build(ref_fn):
    '''
    Build Bowtie2 index for reference if none found.
    '''
    if not (os.path.isfile(ref_fn + '.1.bt2')
            and os.path.isfile(ref_fn + '.2.bt2')
            and os.path.isfile(ref_fn + '.3.bt2')
            and os.path.isfile(ref_fn + '.4.bt2')
            and os.path.isfile(ref_fn + '.rev.1.bt2')
            and os.path.isfile(ref_fn + '.rev.2.bt2')):
        quiet_call([
            PATHS['bowtie2_build'],
            ref_fn,
            ref_fn,
        ])
Exemplo n.º 10
0
def deduplicate(in_bam, out_bam, metrics_file, tmp_dir, max_records):
    '''
    Run Picard MarkDuplicates.
    '''
    quiet_call([
        PATHS['java'], '-jar',
        PATHS['picard'],
        'MarkDuplicates',
        'VALIDATION_STRINGENCY=SILENT',
        'REMOVE_DUPLICATES=TRUE',
        'I=' + in_bam,
        'O=' + out_bam,
        'M=' + metrics_file,
        'TMP_DIR=' + tmp_dir,
        'MAX_RECORDS_IN_RAM=' + str(max_records),
    ])
    index_bam(out_bam)
Exemplo n.º 11
0
def deduplicate(in_bam, out_bam, metrics_file):
    '''
    Run Picard MarkDuplicates.
    '''
    quiet_call([
        'java',
        '-Xmx2g',
        '-jar',
        PATHS['picard'],
        'MarkDuplicates',
        'VALIDATION_STRINGENCY=SILENT',
        'REMOVE_DUPLICATES=TRUE',
        'I=' + in_bam,
        'O=' + out_bam,
        'M=' + metrics_file,
    ])
    index_bam(out_bam)
Exemplo n.º 12
0
def add_read_groups(in_sam, out_bam, sample_header, tmp_dir, max_records):
    '''
    Run Picard AddOrReplaceReadGroups.

    sample_header -- set read groups to sample_header
    '''
    quiet_call([
        PATHS['java'], '-jar',
        PATHS['picard'],
        'AddOrReplaceReadGroups',
        'VALIDATION_STRINGENCY=SILENT',
        'SO=coordinate',
        'RGPL=illumina',
        'RGPU=' + sample_header,
        'RGSM=' + sample_header,
        'RGLB=' + sample_header,
        'RGID=' + sample_header,
        'I=' + in_sam,
        'O=' + out_bam,
        'TMP_DIR=' + tmp_dir,
        'MAX_RECORDS_IN_RAM=' + str(max_records),
    ])
Exemplo n.º 13
0
def add_read_groups(in_sam, out_bam, sample_header):
    '''
    Run Picard AddOrReplaceReadGroups.

    sample_header -- set read groups to sample_header
    '''
    quiet_call([
        'java',
        '-Xmx2g',
        '-jar',
        PATHS['picard'],
        'AddOrReplaceReadGroups',
        'VALIDATION_STRINGENCY=SILENT',
        'SO=coordinate',
        'RGPL=illumina',
        'RGPU=' + sample_header,
        'RGSM=' + sample_header,
        'RGLB=' + sample_header,
        'RGID=' + sample_header,
        'I=' + in_sam,
        'O=' + out_bam,
    ])
Exemplo n.º 14
0
def run_haplotype_caller(bams, ref_fn, output_vcf, log_file, nct=1):
    '''
    Run GATK HaplotypeCaller.
    '''
    input_list = []
    for bam in bams:
        input_list.extend(('-I', bam))

    quiet_call([
        PATHS['java'], '-jar',
        PATHS['gatk'],
        '-T', 'HaplotypeCaller',
        '-o', output_vcf,
        '-A', 'StrandAlleleCountsBySample',
        '-A', 'DepthPerSampleHC',
        '-R', ref_fn,
        '-nct', str(nct),
        '-mmq', '5',
        '-log', log_file,
        '--minPruning', '0',
        '--minDanglingBranchLength', '0',
        '--pcr_indel_model', 'NONE',
    ] + input_list)