def run_base_recalibrator(bam, known_sites, ref_fn, recal_table, log_file): ''' Run GATK BaseRecalibrator. ''' quiet_call([ 'java', '-Xmx8g', '-jar', PATHS['gatk'], '-T', 'BaseRecalibrator', '-I', bam, '-knownSites', known_sites, '--out', recal_table, '-R', ref_fn, '-cov', 'RepeatLengthCovariate', '-cov', 'RepeatUnitCovariate', '-log', log_file, ])
def create_sequence_dictionary(ref_fn): ''' Run Picard CreateSequenceDictionary if none found. ''' if not os.path.exists(ref_fn.split('.fa')[0] + '.dict'): quiet_call([ PATHS['java'], '-jar', PATHS['picard'], 'CreateSequenceDictionary', 'R=' + ref_fn, 'O=' + ref_fn.split('.fa')[0] + '.dict', ])
def realigner_target_creator(ref_fn, in_bam, intervals): ''' Run GATK RealignerTargetCreator. ''' quiet_call([ PATHS['java'], '-jar', PATHS['gatk'], '-R', ref_fn, '-T', 'RealignerTargetCreator', '-I', in_bam, '-o', intervals, ])
def align(fastq_1, ref_fn, output_fn, fastq_2=None, p=1): ''' Align reads using Bowtie2. ''' assert os.path.exists(fastq_1) if fastq_2: assert os.path.exists(fastq_2) if fastq_2: quiet_call([ PATHS['bowtie2'], '-q', '--phred33', '-p', str(p), '-I', '0', '-X', '1000', '--fr', '--local', '--sensitive-local', '-S', output_fn, '-x', ref_fn, '-1', fastq_1, '-2', fastq_2, ]) else: quiet_call([ PATHS['bowtie2'], '-q', '--phred33', '-p', str(p), '-I', '0', '-X', '1000', '--local', '--sensitive-local', '-S', output_fn, '-x', ref_fn, '-U', fastq_1, ])
def run_print_reads_bqsr(input_bam, ref_fn, recal_table, output_bam, log_file): ''' Run GATK PrintReads, observing BQSR relcaibration table. ''' quiet_call([ PATHS['java'], '-jar', PATHS['gatk'], '-T', 'PrintReads', '-I', input_bam, '-R', ref_fn, '-BQSR', recal_table, '-o', output_bam, '-log', log_file, ])
def fix_mate_information(in_bam, out_bam, tmp_dir, max_records): ''' Run Picard FixMateInformation. ''' quiet_call([ PATHS['java'], '-jar', PATHS['picard'], 'FixMateInformation', 'VALIDATION_STRINGENCY=SILENT', 'SO=coordinate', 'I=' + in_bam, 'O=' + out_bam, 'TMP_DIR=' + tmp_dir, 'MAX_RECORDS_IN_RAM=' + str(max_records), ])
def fix_mate_information(in_bam, out_bam): ''' Run Picard FixMateInformation. ''' quiet_call([ 'java', '-Xmx2g', '-jar', PATHS['picard'], 'FixMateInformation', 'VALIDATION_STRINGENCY=SILENT', 'SO=coordinate', 'I=' + in_bam, 'O=' + out_bam, ])
def indel_realigner(ref_fn, log, in_bam, intervals, realigned_bam): ''' Run GATK IndelRealigner. ''' quiet_call([ PATHS['java'], '-jar', PATHS['gatk'], '-R', ref_fn, '-T', 'IndelRealigner', '--maxReadsForRealignment', '100000', '-log', log, '-I', in_bam, '-targetIntervals', intervals, '-o', realigned_bam, ])
def build(ref_fn): ''' Build Bowtie2 index for reference if none found. ''' if not (os.path.isfile(ref_fn + '.1.bt2') and os.path.isfile(ref_fn + '.2.bt2') and os.path.isfile(ref_fn + '.3.bt2') and os.path.isfile(ref_fn + '.4.bt2') and os.path.isfile(ref_fn + '.rev.1.bt2') and os.path.isfile(ref_fn + '.rev.2.bt2')): quiet_call([ PATHS['bowtie2_build'], ref_fn, ref_fn, ])
def deduplicate(in_bam, out_bam, metrics_file, tmp_dir, max_records): ''' Run Picard MarkDuplicates. ''' quiet_call([ PATHS['java'], '-jar', PATHS['picard'], 'MarkDuplicates', 'VALIDATION_STRINGENCY=SILENT', 'REMOVE_DUPLICATES=TRUE', 'I=' + in_bam, 'O=' + out_bam, 'M=' + metrics_file, 'TMP_DIR=' + tmp_dir, 'MAX_RECORDS_IN_RAM=' + str(max_records), ]) index_bam(out_bam)
def deduplicate(in_bam, out_bam, metrics_file): ''' Run Picard MarkDuplicates. ''' quiet_call([ 'java', '-Xmx2g', '-jar', PATHS['picard'], 'MarkDuplicates', 'VALIDATION_STRINGENCY=SILENT', 'REMOVE_DUPLICATES=TRUE', 'I=' + in_bam, 'O=' + out_bam, 'M=' + metrics_file, ]) index_bam(out_bam)
def add_read_groups(in_sam, out_bam, sample_header, tmp_dir, max_records): ''' Run Picard AddOrReplaceReadGroups. sample_header -- set read groups to sample_header ''' quiet_call([ PATHS['java'], '-jar', PATHS['picard'], 'AddOrReplaceReadGroups', 'VALIDATION_STRINGENCY=SILENT', 'SO=coordinate', 'RGPL=illumina', 'RGPU=' + sample_header, 'RGSM=' + sample_header, 'RGLB=' + sample_header, 'RGID=' + sample_header, 'I=' + in_sam, 'O=' + out_bam, 'TMP_DIR=' + tmp_dir, 'MAX_RECORDS_IN_RAM=' + str(max_records), ])
def add_read_groups(in_sam, out_bam, sample_header): ''' Run Picard AddOrReplaceReadGroups. sample_header -- set read groups to sample_header ''' quiet_call([ 'java', '-Xmx2g', '-jar', PATHS['picard'], 'AddOrReplaceReadGroups', 'VALIDATION_STRINGENCY=SILENT', 'SO=coordinate', 'RGPL=illumina', 'RGPU=' + sample_header, 'RGSM=' + sample_header, 'RGLB=' + sample_header, 'RGID=' + sample_header, 'I=' + in_sam, 'O=' + out_bam, ])
def run_haplotype_caller(bams, ref_fn, output_vcf, log_file, nct=1): ''' Run GATK HaplotypeCaller. ''' input_list = [] for bam in bams: input_list.extend(('-I', bam)) quiet_call([ PATHS['java'], '-jar', PATHS['gatk'], '-T', 'HaplotypeCaller', '-o', output_vcf, '-A', 'StrandAlleleCountsBySample', '-A', 'DepthPerSampleHC', '-R', ref_fn, '-nct', str(nct), '-mmq', '5', '-log', log_file, '--minPruning', '0', '--minDanglingBranchLength', '0', '--pcr_indel_model', 'NONE', ] + input_list)