Exemplo n.º 1
0
 def pileup_depth(self, pileup_depth):
     md5_file = '%s.md5' % (pileup_depth)
     if md5sum_check(pileup_depth, md5_file):
         log_progress(__modname__,
                      'Get Pileup Depth already finished!!!',
                      f=self._log_file)
         log_version(__modname__,
                     self._sw['samtools_ver'],
                     f=self._log_file)
     else:
         log_progress(__modname__,
                      'Get Pileup Depth start',
                      f=self._log_file)
         log_version(__modname__,
                     self._sw['samtools_ver'],
                     f=self._log_file)
         exec_cmd = [
             self._sw['samtools'], 'depth', '-a', '-q', '0', '-Q', '1',
             '-d', '1000000', '-b', self._target_bed, '--reference',
             self._sw['hg19'], self._final_bam
         ]
         run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w',
                                 pileup_depth)
         run_command_md5sum(__modname__, self._log_file, pileup_depth,
                            md5_file)
         log_progress(__modname__,
                      'Get Pileup Depth finished',
                      f=self._log_file)
Exemplo n.º 2
0
 def vcf_post_processing(self, input_file, refined_vcf):
     refined_vcf_md5 = '{0}.md5'.format(refined_vcf)
     if md5sum_check(refined_vcf, refined_vcf_md5):
         log_progress(__modname__,
                      'VCF post processing already finished!!!',
                      f=self._log_file)
         log_version(__modname__, self._sw['vt_ver'], f=self._log_file)
     else:
         log_progress(__modname__,
                      'VCF post processing start',
                      f=self._log_file)
         log_version(__modname__, self._sw['vt_ver'], f=self._log_file)
         if os.path.exists(refined_vcf):
             os.remove(refined_vcf)
         exec_cmd = [
             '{0} normalize -r {1} {2}'.format(self._sw['vt'],
                                               self._sw['hg19'],
                                               input_file),
             '{0} decompose -s -'.format(self._sw['vt']),
         ]
         run_command_pipe_file_handle(__modname__, exec_cmd, self._log_file,
                                      'w', refined_vcf)
         run_command_md5sum(__modname__, self._log_file, refined_vcf,
                            refined_vcf_md5)
         log_progress(__modname__,
                      'VCF post processing finished',
                      f=self._log_file)
Exemplo n.º 3
0
 def remove_reference_info(self, input_file, remove_ref_vcf):
     remove_ref_vcf_md5 = '{0}.md5'.format(remove_ref_vcf)
     # remove only reference...
     if md5sum_check(remove_ref_vcf, remove_ref_vcf_md5):
         log_progress(__modname__,
                      'Remove only reference in VCF already finished!!!',
                      f=self._log_file)
         log_version(__modname__,
                     self._sw['vcftools_ver'],
                     f=self._log_file)
         log_version(__modname__, self._sw['vt_ver'], f=self._log_file)
     else:
         log_progress(__modname__,
                      'Remove only reference in VCF start',
                      f=self._log_file)
         log_version(__modname__,
                     self._sw['vcftools_ver'],
                     f=self._log_file)
         log_version(__modname__, self._sw['vt_ver'], f=self._log_file)
         if os.path.exists(remove_ref_vcf):
             os.remove(remove_ref_vcf)
         exec_cmd = [
             '{0} --vcf {1} --recode --stdout'.format(
                 self._sw['vcftools'], input_file), 'grep -v "0[/|]0"',
             'grep -v "\.[/|]\."', '{0} sort -'.format(self._sw['vt'])
         ]
         run_command_pipe_file_handle(__modname__, exec_cmd, self._log_file,
                                      'w', remove_ref_vcf)
         run_command_md5sum(__modname__, self._log_file, remove_ref_vcf,
                            remove_ref_vcf_md5)
         log_progress(__modname__,
                      'Remove only reference in VCF finished',
                      f=self._log_file)
Exemplo n.º 4
0
    def run_snpEff(self, input_file, output_file):
        snpeff_tmp_out = join(self._variant_dir,
                              '{0}_snpeff_tmp.vcf'.format(self._sample_name))
        output_md5 = '{0}.md5'.format(output_file)
        if md5sum_check(output_file, output_md5):
            log_progress(__modname__,
                         'snpEff gene annotation already finished!!!',
                         f=self._log_file)
            log_version(__modname__, self._sw['snpeff_ver'], f=self._log_file)
        else:
            log_progress(__modname__,
                         'snpEff gene annotation start',
                         f=self._log_file)
            log_version(__modname__, self._sw['snpeff_ver'], f=self._log_file)

            if os.path.exists(snpeff_tmp_out):
                os.remove(snpeff_tmp_out)
            if os.path.exists(output_file):
                os.remove(output_file)

            exec_cmd1 = [
                self._sw['java'], '-Xmx4g', '-XX:ParallelGCThreads={0}'.format(
                    self._pe_core), '-Djava.io.tmpdir={0}'.format(
                        self._sample_tmp_dir), '-jar', self._sw['snpeff'],
                'ann', 'hg19ngb', '-no-downstream', '-no-upstream', '-noStats',
                '-no', 'INTERGENIC', '-no', 'INTERGENIC_CONSERVED', '-no',
                'INTRAGENIC', '-no', 'RARE_AMINO_ACID', '-no', 'TRANSCRIPT',
                '-no', 'TRANSCRIPT_DELETED', '-no', 'REGULATION', '-no',
                'NEXT_PROT', '-no', 'PROTEIN_STRUCTURAL_INTERACTION_LOCUS',
                '-no', 'PROTEIN_PROTEIN_INTERACTION_LOCUS', input_file
            ]
            run_command_file_handle(__modname__, exec_cmd1, self._log_file,
                                    'w', snpeff_tmp_out)

            exec_cmd2 = [
                self._sw['bcftools'], 'view', '-i', 'INFO/ANN!="."',
                snpeff_tmp_out
            ]
            run_command_file_handle(__modname__, exec_cmd2, self._log_file,
                                    'w', output_file)
            run_command_md5sum(__modname__, self._log_file, output_file,
                               output_md5)
            log_progress(__modname__,
                         'snpEff gene annotation finished',
                         f=self._log_file)
Exemplo n.º 5
0
    def run_dbnsfp_annotation(self, input_file, output_file):
        dbnsfp_tmp_vcf = join(self._variant_dir,
                              '{0}_dbnsfp_tmp.vcf'.format(self._sample_name))
        output_md5 = '{0}.md5'.format(output_file)
        if md5sum_check(output_file, output_md5):
            log_progress(__modname__,
                         'dbNSFP annotation already finished!!!',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['dbnsfp_db_ver'],
                        f=self._log_file)
        else:
            log_progress(__modname__,
                         'dbNSFP annotation start',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['dbnsfp_db_ver'],
                        f=self._log_file)

            exec_cmd1 = [
                self._sw['java'], '-Xmx4g',
                '-XX:ParallelGCThreads={0}'.format(self._pe_core),
                '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar',
                self._sw['snpsift'], 'dbnsfp', '-f',
                'aapos,aapos_SIFT,aapos_FATHMM,Uniprot_acc,Interpro_domain,SIFT_pred,SIFT_score,LRT_pred,MutationTaster_pred,MutationTaster_score,GERP++_NR,GERP++_RS,phastCons100way_vertebrate,MutationAssessor_pred,FATHMM_pred,PROVEAN_pred,MetaSVM_pred,Polyphen2_HDIV_pred,Polyphen2_HDIV_score,Polyphen2_HVAR_pred,Polyphen2_HVAR_score,CADD_phred',
                '-db', self._sw['dbnsfp_db'], input_file
            ]
            run_command_file_handle(__modname__, exec_cmd1, self._log_file,
                                    'w', dbnsfp_tmp_vcf)

            exec_cmd2 = [
                'python', self._sw['ngb_transcript_dbNSFP'], '-o', output_file,
                dbnsfp_tmp_vcf
            ]
            run_command(__modname__, exec_cmd2, self._log_file)
            run_command_md5sum(__modname__, self._log_file, output_file,
                               output_md5)
            run_command(__modname__,
                        ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)],
                        self._log_file)
            log_progress(__modname__,
                         'dbNSFP annotation finished',
                         f=self._log_file)
Exemplo n.º 6
0
 def generate_tdf_file(self, final_bam):
     tdf_file = '{0}.tdf'.format(final_bam)
     tdf_file_md5 = '{0}.md5'.format(tdf_file)
     if md5sum_check(tdf_file, tdf_file_md5):
         log_progress(__modname__, 'TDF file generation already finished!!!', f=self._log_file)
         log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file)
     else:
         log_progress(__modname__, 'TDF file generation start', f=self._log_file)
         log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file)
         if os.path.exists(tdf_file):
             os.remove(tdf_file)
         exec_cmd = [
             self._sw['igvtools'],
             'count',
             final_bam,
             tdf_file,
             'hg19'
         ]
         run_command(__modname__, exec_cmd, self._log_file)
         run_command_md5sum(__modname__, self._log_file, tdf_file, tdf_file_md5)
         log_progress(__modname__, 'TDF file generation finished', f=self._log_file)
Exemplo n.º 7
0
    def run_TST170_pipeline(self, TST170_completed_file_path):
        with open(self._status_log_file, 'w') as f:
            f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 5')
        log_progress(__modname__, 'Run the TruSightTumor170 pipeline', f=self._log_file)
        
        if os.path.exists(TST170_completed_file_path):
            log_progress(__modname__, 'TruSightTumor170 pipeline already finished', f=self._log_file)
        else:
            log_progress(__modname__, 'TruSightTumor170 pipeline start', f=self._log_file)
            exec_cmd = [
                    'docker', 'run',
                    '-t',
                    '--rm',
                    '--name', self._run_id,
                    '-v', '/etc/localtime:/etc/localtime',
                    '-v', '{0}:/data'.format(self._run_dir),
                    '-v', '{0}:/genomes'.format(self._TST170_Resources),
                    '-v', '{0}:/analysis'.format(self._output_dir),
                    'ngb_tst170:1.0.0.0'
            ]
            run_command(__modname__, exec_cmd, self._log_file)
            sleep(10)
        # check TST170 output directory

        log_version(__modname__, 'software,TST170 Local App,v1.0,Oct.2017,http://sapac.support.illumina.com/downloads/trusight-tumor-170-local-app-documentation.html?langsel=/kr/,None', f=self._log_file)

        _dirs = os.listdir(self._output_dir)
        tst170_dir = None
        for _dir in _dirs:
            if _dir.startswith("TruSightTumor170_Analysis_"):
                tst170_dir = os.path.join(self._output_dir, _dir)

        if tst170_dir is not None and os.path.exists(join(tst170_dir, 'Summary.tsv')):
            with open(TST170_completed_file_path, 'w') as completed_file:
                completed_file.write('TruSightTumor170 pipeline finished')
            log_progress(__modname__, 'TruSightTumor170 pipeline finished', f=self._log_file)
        return tst170_dir
Exemplo n.º 8
0
    def run_clinvar_annotation(self, input_file, output_file):
        clinvar_tmp_vcf = join(self._variant_dir,
                               '{0}_clinvar_tmp.vcf'.format(self._sample_name))
        output_md5 = '{0}.md5'.format(output_file)
        if md5sum_check(output_file, output_md5):
            log_progress(__modname__,
                         'ClinVar annotation already finished!!!',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['ngb_clinvar_ver'],
                        f=self._log_file)
        else:
            log_progress(__modname__,
                         'ClinVar annotation start',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['ngb_clinvar_ver'],
                        f=self._log_file)

            exec_cmd1 = [
                'python', self._sw['ngb_anno_clinvar'], '--dbfile',
                self._sw['ngb_clinvar_db'], '--infoVCF',
                self._sw['clinvar_compact_header'], '--inVCF', input_file,
                '--outVCF', clinvar_tmp_vcf
            ]
            run_command(__modname__, exec_cmd1, self._log_file)

            exec_cmd2 = [
                'python', self._sw['ngb_clinvar_variation'], '-d',
                self._sw['ngb_clinvar_ref'], '-o', output_file, clinvar_tmp_vcf
            ]
            run_command(__modname__, exec_cmd2, self._log_file)
            run_command_md5sum(__modname__, self._log_file, output_file,
                               output_md5)
            log_progress(__modname__,
                         'ClinVar annotation finished',
                         f=self._log_file)
Exemplo n.º 9
0
 def run_annotation(self, db_name, target_vcf, target_vcf_ver, _info, _name,
                    input_file, output_file):
     output_md5 = '{0}.md5'.format(output_file)
     if md5sum_check(output_file, output_md5):
         log_progress(__modname__,
                      '{0} annotation already finished!!!'.format(db_name),
                      f=self._log_file)
         log_version(__modname__, target_vcf_ver, f=self._log_file)
     else:
         log_progress(__modname__,
                      '{0} annotation start'.format(db_name),
                      f=self._log_file)
         log_version(__modname__, target_vcf_ver, f=self._log_file)
         exec_cmd = [
             self._sw['java'], '-Xmx4g',
             '-XX:ParallelGCThreads={0}'.format(self._pe_core),
             '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar',
             self._sw['snpsift'], 'annotate'
         ]
         if _info != '':
             exec_cmd.append('-info')
             exec_cmd.append(_info)
         if _name != '':
             exec_cmd.append('-name')
             exec_cmd.append(_name)
         exec_cmd.append(target_vcf)
         exec_cmd.append(input_file)
         run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w',
                                 output_file)
         run_command_md5sum(__modname__, self._log_file, output_file,
                            output_md5)
         run_command(__modname__,
                     ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)],
                     self._log_file)
         log_progress(__modname__,
                      '{0} annotation finished'.format(db_name),
                      f=self._log_file)