def pileup_depth(self, pileup_depth): md5_file = '%s.md5' % (pileup_depth) if md5sum_check(pileup_depth, md5_file): log_progress(__modname__, 'Get Pileup Depth already finished!!!', f=self._log_file) log_version(__modname__, self._sw['samtools_ver'], f=self._log_file) else: log_progress(__modname__, 'Get Pileup Depth start', f=self._log_file) log_version(__modname__, self._sw['samtools_ver'], f=self._log_file) exec_cmd = [ self._sw['samtools'], 'depth', '-a', '-q', '0', '-Q', '1', '-d', '1000000', '-b', self._target_bed, '--reference', self._sw['hg19'], self._final_bam ] run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w', pileup_depth) run_command_md5sum(__modname__, self._log_file, pileup_depth, md5_file) log_progress(__modname__, 'Get Pileup Depth finished', f=self._log_file)
def vcf_post_processing(self, input_file, refined_vcf): refined_vcf_md5 = '{0}.md5'.format(refined_vcf) if md5sum_check(refined_vcf, refined_vcf_md5): log_progress(__modname__, 'VCF post processing already finished!!!', f=self._log_file) log_version(__modname__, self._sw['vt_ver'], f=self._log_file) else: log_progress(__modname__, 'VCF post processing start', f=self._log_file) log_version(__modname__, self._sw['vt_ver'], f=self._log_file) if os.path.exists(refined_vcf): os.remove(refined_vcf) exec_cmd = [ '{0} normalize -r {1} {2}'.format(self._sw['vt'], self._sw['hg19'], input_file), '{0} decompose -s -'.format(self._sw['vt']), ] run_command_pipe_file_handle(__modname__, exec_cmd, self._log_file, 'w', refined_vcf) run_command_md5sum(__modname__, self._log_file, refined_vcf, refined_vcf_md5) log_progress(__modname__, 'VCF post processing finished', f=self._log_file)
def remove_reference_info(self, input_file, remove_ref_vcf): remove_ref_vcf_md5 = '{0}.md5'.format(remove_ref_vcf) # remove only reference... if md5sum_check(remove_ref_vcf, remove_ref_vcf_md5): log_progress(__modname__, 'Remove only reference in VCF already finished!!!', f=self._log_file) log_version(__modname__, self._sw['vcftools_ver'], f=self._log_file) log_version(__modname__, self._sw['vt_ver'], f=self._log_file) else: log_progress(__modname__, 'Remove only reference in VCF start', f=self._log_file) log_version(__modname__, self._sw['vcftools_ver'], f=self._log_file) log_version(__modname__, self._sw['vt_ver'], f=self._log_file) if os.path.exists(remove_ref_vcf): os.remove(remove_ref_vcf) exec_cmd = [ '{0} --vcf {1} --recode --stdout'.format( self._sw['vcftools'], input_file), 'grep -v "0[/|]0"', 'grep -v "\.[/|]\."', '{0} sort -'.format(self._sw['vt']) ] run_command_pipe_file_handle(__modname__, exec_cmd, self._log_file, 'w', remove_ref_vcf) run_command_md5sum(__modname__, self._log_file, remove_ref_vcf, remove_ref_vcf_md5) log_progress(__modname__, 'Remove only reference in VCF finished', f=self._log_file)
def run_snpEff(self, input_file, output_file): snpeff_tmp_out = join(self._variant_dir, '{0}_snpeff_tmp.vcf'.format(self._sample_name)) output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'snpEff gene annotation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['snpeff_ver'], f=self._log_file) else: log_progress(__modname__, 'snpEff gene annotation start', f=self._log_file) log_version(__modname__, self._sw['snpeff_ver'], f=self._log_file) if os.path.exists(snpeff_tmp_out): os.remove(snpeff_tmp_out) if os.path.exists(output_file): os.remove(output_file) exec_cmd1 = [ self._sw['java'], '-Xmx4g', '-XX:ParallelGCThreads={0}'.format( self._pe_core), '-Djava.io.tmpdir={0}'.format( self._sample_tmp_dir), '-jar', self._sw['snpeff'], 'ann', 'hg19ngb', '-no-downstream', '-no-upstream', '-noStats', '-no', 'INTERGENIC', '-no', 'INTERGENIC_CONSERVED', '-no', 'INTRAGENIC', '-no', 'RARE_AMINO_ACID', '-no', 'TRANSCRIPT', '-no', 'TRANSCRIPT_DELETED', '-no', 'REGULATION', '-no', 'NEXT_PROT', '-no', 'PROTEIN_STRUCTURAL_INTERACTION_LOCUS', '-no', 'PROTEIN_PROTEIN_INTERACTION_LOCUS', input_file ] run_command_file_handle(__modname__, exec_cmd1, self._log_file, 'w', snpeff_tmp_out) exec_cmd2 = [ self._sw['bcftools'], 'view', '-i', 'INFO/ANN!="."', snpeff_tmp_out ] run_command_file_handle(__modname__, exec_cmd2, self._log_file, 'w', output_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) log_progress(__modname__, 'snpEff gene annotation finished', f=self._log_file)
def run_dbnsfp_annotation(self, input_file, output_file): dbnsfp_tmp_vcf = join(self._variant_dir, '{0}_dbnsfp_tmp.vcf'.format(self._sample_name)) output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'dbNSFP annotation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['dbnsfp_db_ver'], f=self._log_file) else: log_progress(__modname__, 'dbNSFP annotation start', f=self._log_file) log_version(__modname__, self._sw['dbnsfp_db_ver'], f=self._log_file) exec_cmd1 = [ self._sw['java'], '-Xmx4g', '-XX:ParallelGCThreads={0}'.format(self._pe_core), '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar', self._sw['snpsift'], 'dbnsfp', '-f', 'aapos,aapos_SIFT,aapos_FATHMM,Uniprot_acc,Interpro_domain,SIFT_pred,SIFT_score,LRT_pred,MutationTaster_pred,MutationTaster_score,GERP++_NR,GERP++_RS,phastCons100way_vertebrate,MutationAssessor_pred,FATHMM_pred,PROVEAN_pred,MetaSVM_pred,Polyphen2_HDIV_pred,Polyphen2_HDIV_score,Polyphen2_HVAR_pred,Polyphen2_HVAR_score,CADD_phred', '-db', self._sw['dbnsfp_db'], input_file ] run_command_file_handle(__modname__, exec_cmd1, self._log_file, 'w', dbnsfp_tmp_vcf) exec_cmd2 = [ 'python', self._sw['ngb_transcript_dbNSFP'], '-o', output_file, dbnsfp_tmp_vcf ] run_command(__modname__, exec_cmd2, self._log_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) run_command(__modname__, ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)], self._log_file) log_progress(__modname__, 'dbNSFP annotation finished', f=self._log_file)
def generate_tdf_file(self, final_bam): tdf_file = '{0}.tdf'.format(final_bam) tdf_file_md5 = '{0}.md5'.format(tdf_file) if md5sum_check(tdf_file, tdf_file_md5): log_progress(__modname__, 'TDF file generation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file) else: log_progress(__modname__, 'TDF file generation start', f=self._log_file) log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file) if os.path.exists(tdf_file): os.remove(tdf_file) exec_cmd = [ self._sw['igvtools'], 'count', final_bam, tdf_file, 'hg19' ] run_command(__modname__, exec_cmd, self._log_file) run_command_md5sum(__modname__, self._log_file, tdf_file, tdf_file_md5) log_progress(__modname__, 'TDF file generation finished', f=self._log_file)
def run_TST170_pipeline(self, TST170_completed_file_path): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 5') log_progress(__modname__, 'Run the TruSightTumor170 pipeline', f=self._log_file) if os.path.exists(TST170_completed_file_path): log_progress(__modname__, 'TruSightTumor170 pipeline already finished', f=self._log_file) else: log_progress(__modname__, 'TruSightTumor170 pipeline start', f=self._log_file) exec_cmd = [ 'docker', 'run', '-t', '--rm', '--name', self._run_id, '-v', '/etc/localtime:/etc/localtime', '-v', '{0}:/data'.format(self._run_dir), '-v', '{0}:/genomes'.format(self._TST170_Resources), '-v', '{0}:/analysis'.format(self._output_dir), 'ngb_tst170:1.0.0.0' ] run_command(__modname__, exec_cmd, self._log_file) sleep(10) # check TST170 output directory log_version(__modname__, 'software,TST170 Local App,v1.0,Oct.2017,http://sapac.support.illumina.com/downloads/trusight-tumor-170-local-app-documentation.html?langsel=/kr/,None', f=self._log_file) _dirs = os.listdir(self._output_dir) tst170_dir = None for _dir in _dirs: if _dir.startswith("TruSightTumor170_Analysis_"): tst170_dir = os.path.join(self._output_dir, _dir) if tst170_dir is not None and os.path.exists(join(tst170_dir, 'Summary.tsv')): with open(TST170_completed_file_path, 'w') as completed_file: completed_file.write('TruSightTumor170 pipeline finished') log_progress(__modname__, 'TruSightTumor170 pipeline finished', f=self._log_file) return tst170_dir
def run_clinvar_annotation(self, input_file, output_file): clinvar_tmp_vcf = join(self._variant_dir, '{0}_clinvar_tmp.vcf'.format(self._sample_name)) output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'ClinVar annotation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['ngb_clinvar_ver'], f=self._log_file) else: log_progress(__modname__, 'ClinVar annotation start', f=self._log_file) log_version(__modname__, self._sw['ngb_clinvar_ver'], f=self._log_file) exec_cmd1 = [ 'python', self._sw['ngb_anno_clinvar'], '--dbfile', self._sw['ngb_clinvar_db'], '--infoVCF', self._sw['clinvar_compact_header'], '--inVCF', input_file, '--outVCF', clinvar_tmp_vcf ] run_command(__modname__, exec_cmd1, self._log_file) exec_cmd2 = [ 'python', self._sw['ngb_clinvar_variation'], '-d', self._sw['ngb_clinvar_ref'], '-o', output_file, clinvar_tmp_vcf ] run_command(__modname__, exec_cmd2, self._log_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) log_progress(__modname__, 'ClinVar annotation finished', f=self._log_file)
def run_annotation(self, db_name, target_vcf, target_vcf_ver, _info, _name, input_file, output_file): output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, '{0} annotation already finished!!!'.format(db_name), f=self._log_file) log_version(__modname__, target_vcf_ver, f=self._log_file) else: log_progress(__modname__, '{0} annotation start'.format(db_name), f=self._log_file) log_version(__modname__, target_vcf_ver, f=self._log_file) exec_cmd = [ self._sw['java'], '-Xmx4g', '-XX:ParallelGCThreads={0}'.format(self._pe_core), '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar', self._sw['snpsift'], 'annotate' ] if _info != '': exec_cmd.append('-info') exec_cmd.append(_info) if _name != '': exec_cmd.append('-name') exec_cmd.append(_name) exec_cmd.append(target_vcf) exec_cmd.append(input_file) run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w', output_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) run_command(__modname__, ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)], self._log_file) log_progress(__modname__, '{0} annotation finished'.format(db_name), f=self._log_file)