示例#1
0
    def annotate_gff(self, sampleID):

        cmd = '''
            set -eo pipefail
            echo annotate gff for {sampleID}: `date "+%F %T"`\n

            cd {analydir}/SV/{sampleID}/{sv_soft}

            sh {annovar} \\
                -t SVType \\
                {sampleID}.{sv_soft}.gff \\
                {sampleID}.{sv_soft}

            python {moduledir}/Varition/SV/sv_cnv_stat.py \\
                -i {sampleID}.{sv_soft}.hg19_multianno.xls \\
                -s {sampleID} \\
                -soft {sv_soft}

            echo annotate gff for {sampleID} `date "+%F %T"`
            '''.format(sampleID=sampleID, **self.__dict__)

        shell_path = '{analydir}/SV/{sampleID}/{sv_soft}/annotate_gff_{sampleID}.sh'.format(
            sampleID=sampleID, **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'annotate_gff'
        job_name = 'annotate_gff_{}'.format(sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        after_jobs = ['data_release', 'primary_report']
        utils.add_order(self.orders, job_name, after_jobs=after_jobs)
示例#2
0
    def breakdancer_config(self, sampleID):

        cmd = '''
            set -eo pipefail
            echo breakdancer config for {sampleID} start: `date "+%F %T"`\n

            cd {analydir}/SV/{sampleID}/breakdancer

            perl {soft_dir}/breakdancer/current/bam2cfg.pl \\
                -g -h -n 100000 \\
                {analydir}/Mapping/{sampleID}.{sampleID}/{sampleID}.final.bam \\
                > {sampleID}.breakdancer.cfg

            echo breakdancer config for {sampleID} done: `date "+%F %T"`
        '''.format(sampleID=sampleID, **self.__dict__)

        shell_path = '{analydir}/SV/{sampleID}/breakdancer/breakdancer_config_{sampleID}.sh'.format(
            sampleID=sampleID, **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'breakdancer_config'
        job_name = 'breakdancer_config_{}'.format(sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['final_bam_{sampleID}'.format(sampleID=sampleID)]
        utils.add_order(self.orders, job_name, before_jobs=before_jobs)
示例#3
0
    def phenolyzer(self):

        print '>  phenolyzer ...'
        # write shell
        if not self.args['disease_name']:
            print '[error] phenolyzer needs disease name in your sample_info'
            exit(1)

        cmd = '''
            set -eo pipefail
            echo phenolyzer start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/Network

            # Phenolyzer
            python {moduledir}/Phenolyzer/phenolyzer-0.1.5/phenolyzer_pipe4.7.py \\
                --dir {analydir} \\
                --disease "{disease_name}" \\
                --genelist {analydir}/Advance/{newjob}/IntegrateResult/total.candidate.gene.xls \\
                --job {newjob}

            # DisGeNet
            python {moduledir}/DisGeNet/disgenet.py \\
                --id '{disease_ids}' \\
                --glist {analydir}/Advance/{newjob}/IntegrateResult/total.candidate.gene.xls \\
                --out_dir .

            # Brief Result
            echo generate brief results

            python {ROOT_DIR}/modules/brief/text2excel.py \\
                {BriefResults}/Network/phenolyzer.xlsx \\
                {ROOT_DIR}/modules/brief/readme/phenolyzer.readme.xls \\
                AllGene_list.xls \\
                CandidateGene_list.xls \\
                CandidateGene_score.xls

            python {ROOT_DIR}/modules/brief/text2excel.py \\
                {BriefResults}/Network/disgenet.xlsx \\
                {ROOT_DIR}/modules/brief/readme/disgenet.readme.xls \\
                DisGeNet_shared_gene.xls

            echo phenolyzer done: `date "+%F %T"`
        '''.format(**self.__dict__)

        shell_path = '{analydir}/Advance/{newjob}/Network/phenolyzer.sh'.format(
            **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'phenolyzer'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['integrate_result']
        after_jobs = ['data_release']
        utils.add_order(self.orders, job_name, before_jobs=before_jobs, after_jobs=after_jobs)
示例#4
0
    def stat_uncover(self, patientID, sampleID):

        # based on sort.bam
        # print '  stat uncover...'
        # write shell
        if self.args['seqstrag'] != 'WGS':
            cmd = '''
                set -eo pipefail
                echo stat uncover for {sampleID} start: `date "+%F %T"`

                cd {analydir}/Alnstat/{sampleID}

                samtools-1.6 depth \\
                    -aa -q 0 -Q 0 \\
                    -b {TR} \\
                    {analydir}/Mapping/{patientID}.{sampleID}/{sampleID}.sort.bam |
                awk -F'\\t' '$3==0' |
                grep -vwf target_region.00.depth > target_region.0.depth

                python {moduledir}/Alnstat/uncover_pos_chr_pipe4.6.py \\
                    target_region.0.depth \\
                    {sampleID} \\
                    {sampleID}.uncovered_region.annovar.result.xls

                rm -f target_region.0.depth

                echo stat uncover for {sampleID} done: `date "+%F %T"`
            '''
        else:
            cmd = '''
                set -eo pipefail
                echo stat uncover for {sampleID} start: `date "+%F %T"`
            
                rm -f *.depth *.bed *.pdf* *.png

                echo stat uncover for {sampleID} done: `date "+%F %T"`
            '''

        cmd = cmd.format(patientID=patientID, sampleID=sampleID, **self.args)

        shell_path = '{analydir}/Alnstat/{sampleID}/stat_uncover_{sampleID}.sh'.format(
            analydir=self.analydir, sampleID=sampleID)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'stat_uncover'
        job_name = 'stat_uncover_{sampleID}'.format(sampleID=sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['stat_depth_{sampleID}'.format(sampleID=sampleID)]
        after_jobs = []
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#5
0
    def ibd(self):

        if self.args['seqstrag'] == 'WGS':
            region = '-r 1'
        else:
            region = '-R {TR}'.format(**self.__dict__)

        print '>   IBD'
        cmd = '''
            set -eo pipefail

            echo IBD start: `date "+%F %T"`

            cd ${Merged_vcf}/IBD

            # extract region
            bcftools-1.6 view \\
                ${region} \\
                ../VCF/snp.merged.vcf.gz |
            awk '$5!~/*/' > snp.merged.bed.vcf

            # extract sample_info
            awk -F '\\t' -v OFS='\\t' '$1!~/^#/{print $2, $2, $1, $2}' \\
                ${samp_info} \\
                > sample.ped

            # plink 
            plink --vcf snp.merged.bed.vcf --double-id --update-ids sample.ped --make-bed -out plink

            plink --bfile plink --genome -out all
            plink --bfile plink --genome  --rel-check -out family

            # result
            awk -v OFS='\\t' '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10}' all.genome > all.IBD.xls
            awk -v OFS='\\t' '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10}' family.genome > family.IBD.xls

            ln -sf ${moduledir}/IBD/readme.txt IBD.readme.txt

            rm -f plink.* *.{log,nosex,genome} *.vcf *.ped

            echo IBD done: `date "+%F %T"`
        '''
        
        cmd = Template(cmd).safe_substitute(**dict(self.__dict__, **locals()))

        shell_path = '{Merged_vcf}/IBD/IBD.sh'.format(**self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'ibd'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['annotate_merged_snp']
        after_jobs = ['data_release']
        utils.add_order(self.orders, job_name, before_jobs=before_jobs, after_jobs=after_jobs)
示例#6
0
文件: hla.py 项目: Leonguos/pipeline
    def hla_bwa_mem(self, sampleid, lane):

        # print '>  hla bwa mem ...'
        cmd = '''
            set -eo pipefail
            echo hla bwa mem and samtools sort for {sampleid} start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}

            fq1={analydir}/QC/{sampleid}/{sampleid}_{novoid}_{flowcell}_L{lane}_1.clean.fq
            fq2={analydir}/QC/{sampleid}/{sampleid}_{novoid}_{flowcell}_L{lane}_2.clean.fq
            if [ ! -f $fq1 ];then
                fq1=$fq1.gz
                fq2=$fq2.gz
            fi

            bwa mem \\
                -t 6 -M \\
                -R "@RG\\tID:{sampleid}_{novoid}_{flowcell}_L{lane}\\tSM:{sampleid}\\tLB:{sampleid}\\tPU:{novoid}_{flowcell}_L{lane}\\tPL:illumina\\tCN:novogene" \\
                {athlates_db_dir}/ref/hla_nclean.fasta \\
                $fq1 $fq2 |
            samtools-1.6 view \\
                -@ 5 -b -S -F 4 -t \\
                {athlates_db_dir}/ref/hla_nclean.fasta.fai |
            samtools-1.6 sort \\
                -@ 3 -m 2G \\
                -T {sampleid}_{novoid}_{flowcell}_L{lane}.tmp \\
                -o {sampleid}_{novoid}_{flowcell}_L{lane}.sort.bam

            echo hla bwa mem and samtools sort for {sampleid} done: `date "+%F %T"`
        '''.format(sampleid=sampleid, **dict(lane, **self.__dict__))

        shell_path = '{analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/hla_bwa_mem_{sampleid}_{novoid}_{flowcell}_L{lane}.sh'.format(
            sampleid=sampleid, **dict(lane, **self.__dict__))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'hla_bwa_mem'
        job_name = 'hla_bwa_mem_{sampleid}_{novoid}_{flowcell}_L{lane}'.format(
            sampleid=sampleid, **lane)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)
        # add order
        before_jobs = []
        if self.qc_status == 'waiting':
            before_jobs = [
                'qc_{sampleid}_{novoid}_{flowcell}_L{lane}'.format(
                    sampleid=sampleid, **lane)
            ]

        after_jobs = [
            'hla_sambamba_merge_{sampleid}'.format(sampleid=sampleid, **lane)
        ]
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#7
0
    def sentieon_markdup(self, patientID, sampleID):

        # print '  sentieon markdup...'
        # write shell
        markdup_threads = self.threads['markdup']

        cmd = '''
            set -eo pipefail            
            echo sentieon markdup for {sampleID} start: `date "+%F %T"`

            cd {analydir}/Mapping/{patientID}.{sampleID}

            sentieon driver \\
                -t {markdup_threads} \\
                -i {sampleID}.sort.bam \\
                --algo LocusCollector \\
                --fun score_info \\
                {sampleID}.score.txt

            sentieon driver \\
                -t {markdup_threads} \\
                -i {sampleID}.sort.bam \\
                --algo Dedup \\
                --score_info {sampleID}.score.txt \\
                --metrics {sampleID}.dedup.metrics.txt \\
                {sampleID}.nodup.bam

            echo sentieon markdup for {sampleID} done: `date "+%F %T"`
        '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/Mapping/{patientID}.{sampleID}/sentieon_markdup_{sampleID}.sh'.format(
            **dict(self.__dict__, **locals()))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'sentieon_markdup'
        job_name = 'sentieon_markdup_{sampleID}'.format(sampleID=sampleID)
        utils.add_job(self.jobs,
                      now_point,
                      self.args['startpoint'],
                      self.ANALYSIS_POINTS,
                      job_name,
                      shell_path,
                      self.sentieon_queues,
                      threads=markdup_threads)

        # add order
        before_jobs = [
            '{merge_soft}_merge_{sampleID}'.format(sampleID=sampleID,
                                                   **self.__dict__)
        ]
        after_jobs = 'mapping_check_{sampleID} stat_flag_{sampleID}'.format(
            sampleID=sampleID).split()
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#8
0
    def filter_cnv(self):

        print '>   filter_cnv'
        for sampleid in self.sample_infos:
            cmd = '''
                set -eo pipefail
                echo filter cnv for {sampleid} start: `date "+%F %T"`

                cd {FilterCNV}/{sampleid}

                python {moduledir}/Varition/Filter/filter_sv_cnv.py \\
                    --proj {analydir} \\
                    --overlap 0.7 \\
                    --sampleID {sampleid} \\
                    --outdir {FilterCNV} \\
                    --soft {cnv_soft} \\
                    --lib StringentLib,InclusiveLib,DGV.GoldStandard.July2015,DGV,CNVD

                # Brief Result
                echo generate brief results

                python {ROOT_DIR}/modules/brief/brief_anno.py \\
                    -i {sampleid}.LikelyDeleterious.CNV.xls \\
                    -O {BriefResults}/FilterCNV \\
                    -t sv_cnv

                python {ROOT_DIR}/modules/brief/text2excel.py \\
                    {BriefResults}/FilterCNV/{sampleid}.LikelyDeleterious.CNV.xlsx \\
                    {ROOT_DIR}/modules/brief/readme/filter_sv_cnv.readme.xls \\
                    {BriefResults}/FilterCNV/{sampleid}.LikelyDeleterious.CNV.brief.xls

                echo filter cnv for {sampleid} done: `date "+%F %T"`
            '''.format(sampleid=sampleid, **self.__dict__)

            shell_path = '{FilterCNV}/{sampleid}/filter_cnv_{sampleid}.sh'.format(
                sampleid=sampleid, **self.args)

            utils.write_shell(shell_path, cmd)

            # add job
            now_point = 'filter_cnv'
            job_name = 'filter_cnv_{}'.format(sampleid)
            utils.add_job(self.jobs, now_point, self.args['startpoint'],
                          self.ANALYSIS_POINTS, job_name, shell_path,
                          self.queues)

            # add order
            if self.cnv_soft == 'freec':
                cnv_last = 'freec_call_{}'.format(sampleid)
            elif self.cnv_soft == 'conifer':
                cnv_last = 'conifer_call'

            before_jobs = [cnv_last]
            after_jobs = ['data_release']
            utils.add_order(self.orders,
                            job_name,
                            before_jobs=before_jobs,
                            after_jobs=after_jobs)
示例#9
0
    def pathway(self):

        print '>  pathway ...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo pathway start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/Pathway

            python {moduledir}/Enrich_R/bin/stat_phyper_table.py \\
                -i {analydir}/Advance/{newjob}/IntegrateResult/total.candidate.gene.xls \\
                -o Pathway \\
                -f 1

            # extract gene
            awk -F '\\t' 'NR>1 && $5<0.05 {print8}' Pathway/Pathway_kegg.xls |
                tr '/' '\\n' |
                sort -u > KEGG.xls

            # extract path
            awk -F '\\t' 'NR>1 && $5<0.05 {print1}' Pathway/Pathway_kegg.xls |
                sort -u >> KEGG.xls

            python {moduledir}/KEGG/kegg_svg.py KEGG.xls

            # Brief Result
            echo generate brief results

            python {ROOT_DIR}/modules/brief/text2excel.py \\
                {BriefResults}/Pathway/pathway.xlsx \\
                {ROOT_DIR}/modules/brief/readme/pathway.readme.xls \\
                Pathway/Pathway_go_MF.xls \\
                Pathway/Pathway_go_BP.xls \\
                Pathway/Pathway_go_CC.xls \\
                Pathway/Pathway_kegg.xls

            echo pathway done: `date "+%F %T"`
        '''.format(
            print1='{print $1}',
            print8='{print $8}',
            **self.args)

        shell_path = '{analydir}/Advance/{newjob}/Pathway/pathway_enrichment.sh'.format(
            **self.args)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'pathway'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['integrate_result']
        after_jobs = ['data_release']
        utils.add_order(self.orders, job_name, before_jobs=before_jobs, after_jobs=after_jobs)
示例#10
0
文件: hla.py 项目: Leonguos/pipeline
    def hla_sort_by_name(self, sampleid, gene):

        # print '  sort by name ...'
        cmd = '''
            set -eo pipefail
            echo hla sort by name for {sampleid} {gene} start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/gene/{gene}

            mkdir -p TMP

            for gene in {gene} non-{gene};do

                samtools-1.6 view \\
                    -b -L {athlates_db_dir}/bed/hla.$gene.bed \\
                    -o {sampleid}.$gene.bam \\
                    -@ 4 \\
                    ../../{sampleid}.nodup.bam

                (
                    samtools-1.6 view -H {sampleid}.$gene.bam
                    samtools-1.6 view {sampleid}.$gene.bam | sort -k1,1 -k3,3 -T TMP
                ) | samtools-1.6 view -bS -o {sampleid}.$gene.sort.bam -@ 4 -

                rm -f {sampleid}.$gene.bam

            done

            rm -rf TMP

            # rm -f ../../{sampleid}.sort.bam
            # rm -f ../../{sampleid}.nodup.bam

            echo hla sort by name for {sampleid} {gene} done: `date "+%F %T"`
        '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/gene/{gene}/hla_sort_by_name_{sampleid}_{gene}.sh'.format(
            **dict(self.__dict__, **locals()))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'hla_sort_by_name'
        job_name = 'hla_sort_by_name_{sampleid}_{gene}'.format(**locals())

        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['hla_picard_markdup_{sampleid}'.format(**locals())]
        after_jobs = []
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#11
0
    def freec_call(self, sampleID):

        seqtype = self.args['seqstrag'].split('_')[0]
        target = ''
        if seqtype != 'WGS':
            target = '\\\n{:16}--target {} '.format(' ', self.args['TR'])
        sex = 'XX' if self.sample_infos[sampleID]['sex'] == 'F' else 'XY'

        REF = 'hg19' if self.__dict__['ref'] == 'b37' else self.__dict__['ref']

        cmd = '''
            set -eo pipefail
            echo cnv call with freec for {sampleID} start: `date "+%F %T"`

            cd {analydir}/SV/{sampleID}/freec

            python {moduledir}/Varition/CNV/freec/freec_calling.py \\
                --type {seqtype} {target}\\
                --format BAM \\
                --loh 0 \\
                --contamination 0 \\
                --samName {sampleID} \\
                --sample {analydir}/Mapping/{sampleID}.{sampleID}/{sampleID}.final.bam \\
                --sex {sex} \\
                --ref {ref} \\
                --o .

            python {moduledir}/Varition/CNV/freec/Chr_CNV_freec_pipe4.5.py \\
                --inf ./{sampleID}.freec.{REF}_multianno.xls \\
                --ref {ref} \\
                --sample_info {samp_info}

            rm -f *cpn *txt

            echo cnv call with freec for {sampleID} done: `date "+%F %T"`
            '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/SV/{sampleID}/freec/freec_call_{sampleID}.sh'.format(
            sampleID=sampleID, **self.args)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'freec_call'
        job_name = 'freec_call_{}'.format(sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['final_bam_{sampleID}'.format(sampleID=sampleID)]
        after_jobs = ['primary_report']
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#12
0
    def sambamba_markdup(self, patientID, sampleID):

        # print '  sambamba merge...'
        # write shell
        markdup_threads = self.threads['markdup']

        cmd = '''
            set -eo pipefail
            echo sambamba markdup for {sampleID} start: `date "+%F %T"`

            cd {analydir}/Mapping/{patientID}.{sampleID}

            sambamba markdup \\
                -t {markdup_threads} \\
                --overflow-list-size=10000000 \\
                --tmpdir=tmp \\
                {sampleID}.sort.bam \\
                {sampleID}.nodup.bam

            rm -rf tmp

            echo sambamba markdup for {sampleID} done: `date "+%F %T"`
        '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/Mapping/{patientID}.{sampleID}/sambamba_markdup_{sampleID}.sh'.format(
            **dict(self.__dict__, **locals()))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'sambamba_markdup'
        job_name = 'sambamba_markdup_{sampleID}'.format(sampleID=sampleID)

        utils.add_job(self.jobs,
                      now_point,
                      self.args['startpoint'],
                      self.ANALYSIS_POINTS,
                      job_name,
                      shell_path,
                      self.queues,
                      threads=markdup_threads)

        # add order
        before_jobs = [
            '{merge_soft}_merge_{sampleID}'.format(sampleID=sampleID,
                                                   **self.__dict__)
        ]
        after_jobs = 'mapping_check_{sampleID} stat_flag_{sampleID}'.format(
            sampleID=sampleID).split()
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#13
0
    def gene_association(self):

        print '>  gene association ...'
        self.gene_as_filter()
        # write shell
        cmd = '''
            set -eo pipefail
            echo gene association start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/GeneAS

            for mtype in snp snp.indel;do
                python {moduledir}/Association/Burden/GetBurdenFre.py \\
                    -case $mtype.filter.noXY.V6Pad100.xls.gz \\
                    -control {moduledir}/{pad_100_stat} \\
                    -cr 0.95 \\
                    -nr 0.6 \\
                    -cc N \\
                    -Num 2827 \\
                    -out $mtype.burden.stat.xls
                
                rows=`wc -l $mtype.burden.stat.xls | cut -d' ' -f1`
                if [ $rows -eq 1 ];then
                    echo "[error] no data in $mtype.burden.stat.xls"
                    exit 1
                fi

                Rscript {moduledir}/Association/Burden/GeneFisherPlot.R \\
                    --infile $mtype.burden.stat.xls \\
                    --outpre $mtype.burden

                paste $mtype.burden.fisher.xls $mtype.burden.stat.samstat.xls > $mtype.burden.result.xls
            done

            echo gene association done: `date "+%F %T"`
        '''.format(**self.__dict__)



        shell_path = '{analydir}/Advance/{newjob}/GeneAS/gene_association.sh'.format(
            **self.args)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'gene_association'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['gene_as_filter']
        after_jobs = ['data_release']
        utils.add_order(self.orders, job_name, before_jobs=before_jobs, after_jobs=after_jobs)
示例#14
0
    def ppi(self):

        print '>  ppi ...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo ppi start: `date "+%F %T"`

            cd {PPI}

            echo 9606 > PPI_genes.xls

            cat {analydir}/Advance/{newjob}/IntegrateResult/total.candidate.gene.xls |
                tr ',' '\\n' |
                tr '\\n' '\\t' >> PPI_genes.xls

            echo -e "\\nall\\n20" >> PPI_genes.xls

            java -Xmx6G -jar {genemania_jar} QueryRunner \\
                --data {genemania_data} \\
                --out flat \\
                --results . \\
                PPI_genes.xls

            python {moduledir}/PPI/SplitPPI_Result.py .

            # Brief Result
            echo generate brief results

            python {ROOT_DIR}/modules/brief/text2excel.py \\
                {BriefResults}/PPI/PPI.xlsx \\
                {ROOT_DIR}/modules/brief/readme/ppi.readme.xls \\
                Gene_interactions.xls \\
                Gene_description.xls \\
                Networks.description.xls

            echo ppi done: `date "+%F %T"`
        '''.format(**self.__dict__)

        shell_path = '{PPI}/ppi.sh'.format(
            **self.args)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'ppi'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['integrate_result']
        after_jobs = ['data_release']
        utils.add_order(self.orders, job_name, before_jobs=before_jobs, after_jobs=after_jobs)
示例#15
0
    def mapping_check(self, patientID, sampleID):

        # print '  mapping check...'
        # write shell
        sex = self.sample_infos[sampleID]['sex']

        cmd = '''
            set -eo pipefail
            echo mapping check for sample {sampleID} start: `date "+%F %T"`

            python2 {moduledir}/QC/auto_check.py \\
                --qc_list {qc_list} \\
                --sampid {sampleID} \\
                --pwd {analydir} \\
                --check map \\
                --jobname {newjob} \\
                --seqstrag {seqstrag} \\
                --email {email} \\
                --PE {PE} \\
                --gender {sex} \\
                --dup {dup} \\
                --depth {depth}
            
            # remove clean data if mapping check passed
            if {rm_clean};then
                rm -f {analydir}/QC/{sampleID}/{sampleID}_*.clean.fq*
            fi

            echo mapping check for sample {sampleID} done: `date "+%F %T"`
        '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/Mapping/{patientID}.{sampleID}/mapping_check_{sampleID}.sh'.format(
            **dict(self.__dict__, **locals()))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'mapping_check'
        job_name = 'mapping_check_{sampleID}'.format(sampleID=sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = []
        after_jobs = [
            'final_bam_{sampleID}'.format(sampleID=sampleID), 'data_release'
        ]
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#16
0
    def crest_call(self, sampleID, chrom_list):

        for chrom in chrom_list:
            # 1 extract soft clip
            cmd = '''
                set -eo pipefail
                echo sv call with crest for {sampleID} start: `date "+%F %T"`\n

                # 1 Extract Softclip
                echo extract softclip...
                perl {soft_dir}/CREST/CREST/extractSClip.pl \\
                    -o {analydir}/SV/{sampleID}/crest/bychr \\
                    -i {analydir}/Mapping/{sampleID}.{sampleID}/{sampleID}.final.bam \\
                    -ref_genome {reffasta} \\
                    -r {chrom} \\
                    -p {sampleID}

                # 2 CREST Calling
                echo crest calling...
                perl {soft_dir}/CREST/crest_sv_calling_pipe4.6.pl \\
                    -outDir {analydir}/SV/{sampleID}/crest/bychr \\
                    -tumorBam {analydir}/Mapping/{sampleID}.{sampleID}/{sampleID}.final.bam \\
                    --min_one_side_reads 4 \\
                    -sampleID {sampleID}.{chrom} \\
                    -regionList {refbed} \\
                    -cover {analydir}/SV/{sampleID}/crest/bychr/{sampleID}.{chrom}.cover \\
                    -ref {reffasta} \\
                    -bit {reffasta2bit}

                echo sv call with crest for {sampleID} done: `date "+%F %T"`
            '''.format(sampleID=sampleID, chrom=chrom, **self.__dict__)

            shell_path = '{analydir}/SV/{sampleID}/crest/crest_call_chr_{chrom}_{sampleID}.sh'.format(
                sampleID=sampleID, chrom=chrom.strip('chr'), **self.__dict__)

            utils.write_shell(shell_path, cmd)

            # add job
            now_point = 'crest_call'
            job_name = 'crest_call_{}_{}'.format(chrom, sampleID)
            utils.add_job(self.jobs, now_point, self.args['startpoint'],
                          self.ANALYSIS_POINTS, job_name, shell_path,
                          self.queues)

            # add order
            before_jobs = ['final_bam_{sampleID}'.format(sampleID=sampleID)]
            after_jobs = ['crest_txt2gff_{sampleID}'.format(sampleID=sampleID)]
            utils.add_order(self.orders,
                            job_name,
                            before_jobs=before_jobs,
                            after_jobs=after_jobs)
示例#17
0
    def breakdancer_call(self, sampleID):

        cmd = '''
            set -eo pipefail
            echo breakdancer call for {sampleID} start: `date "+%F %T"`\n

            cd {analydir}/SV/{sampleID}/breakdancer

            {soft_dir}/breakdancer/current/breakdancer-max \\
                -h \\
                -d {sampleID}.breakdancer.SV-supporting \\
                -g {sampleID}.breakdancer.bed \\
                {sampleID}.breakdancer.cfg |
                grep -vwE 'hs37d5|GL000220' \\
                > {sampleID}.breakdancer.txt

            perl {moduledir}/Varition/SV/breakdancer/breakdancer_filter.pl \\
                -g {sex} \\
                -n 6 \\
                -a {sampleID}.breakdancer.txt \\
                > {sampleID}.breakdancer.flt.txt

            perl {moduledir}/Varition/SV/breakdancer/breakdancer_txt2gff.pl \\
                {sampleID}.breakdancer.flt.txt \\
                > {sampleID}.breakdancer.gff

            echo breakdancer call for {sampleID} done: `date "+%F %T"`
        '''.format(sampleID=sampleID,
                   sex=self.sample_infos[sampleID]['sex'],
                   **self.__dict__)

        shell_path = '{analydir}/SV/{sampleID}/breakdancer/breakdancer_call_{sampleID}.sh'.format(
            sampleID=sampleID, **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'breakdancer_call'
        job_name = 'breakdancer_call_{}'.format(sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = [
            'breakdancer_config_{sampleID}'.format(sampleID=sampleID)
        ]
        after_jobs = ['annotate_gff_{sampleID}'.format(sampleID=sampleID)]
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#18
0
    def crest_txt2gff(self, sampleID, chrom_list):

        auto_chrom_last = filter(lambda x: x.strip('chr').isdigit(),
                                 chrom_list)[-1]

        other_chrom = filter(lambda x: not x.strip('chr').isdigit(),
                             chrom_list)

        # print auto_chrom_last
        # print other_chrom

        crest_results = '{{1..%s},%s}' % (auto_chrom_last.strip(
            'chr'), ','.join(map(lambda x: x.strip('chr'), other_chrom)))

        if 'chr' in chrom_list[-1]:
            crest_results = 'chr' + crest_results

        # print crest_results

        cmd = '''
            set -eo pipefail
            echo convert sv results to gff for {sampleID} start: `date "+%F %T"`\n

            cd {analydir}/SV/{sampleID}/crest

            cat bychr/{sampleID}.{crest_results}.predSV.txt |
                grep -vw hs37d5 > {sampleID}.predSV.txt

            perl {moduledir}/Varition/SV/crest/crest_txt2gff.pl \\
                {sampleID}.predSV.txt > {sampleID}.crest.gff

            echo convert sv results to gff for {sampleID} done: `date "+%F %T"`
        '''.format(crest_results=crest_results,
                   sampleID=sampleID,
                   **self.__dict__)

        shell_path = '{analydir}/SV/{sampleID}/crest/crest_txt2gff_{sampleID}.sh'.format(
            sampleID=sampleID, **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'crest_txt2gff'
        job_name = 'crest_txt2gff_{}'.format(sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        after_jobs = ['annotate_gff_{sampleID}'.format(sampleID=sampleID)]
        utils.add_order(self.orders, job_name, after_jobs=after_jobs)
示例#19
0
    def samtools_call_hapmap(self, familyid, samples_with_data):

        vcf_list = '{analydir}/Advance/{newjob}/Linkage/{familyid}/vcf_{familyid}.list'.format(
            **dict(self.__dict__, **locals()))
        with utils.safe_open(vcf_list, 'w') as out:
            for sampleid in samples_with_data:
                out.write('{}.vcf\n'.format(sampleid))

        for sampleid in samples_with_data:
            print '>    samtools call hapmap for', sampleid

            cmd = '''
                set -eo pipefail
                echo samtools call hapmap for {sampleid} start: `date "+%F %T"`

                cd {analydir}/Advance/{newjob}/Linkage/{familyid}

                samtoolsv0.1.19 mpileup \\
                    -d 10000 -C 50 -D -S -m 2 -F 0.02 -q 13 -Q 13 \\
                    -gf {reffasta} \\
                    -l {moduledir}/Linkage/annotHapMap2L.txt \\
                    {analydir}/Mapping/{sampleid}.{sampleid}/{sampleid}.final.bam |
                bcftools_lh view \\
                    -cg -t 0.5 \\
                    -> {sampleid}.vcf

                echo samtools call hapmap for {sampleid} done: `date "+%F %T"`
            '''.format(**dict(self.__dict__, **locals()))

            shell_path = '{analydir}/Advance/{newjob}/Linkage/{familyid}/samtools_call_hapmap_{sampleid}.sh'.format(
                **dict(self.__dict__, **locals()))

            utils.write_shell(shell_path, cmd)

            # add job
            now_point = 'samtools_call_hapmap'
            job_name = 'samtools_call_hapmap_{sampleid}'.format(**locals())
            utils.add_job(self.jobs, now_point, self.args['startpoint'],
                          self.ANALYSIS_POINTS, job_name, shell_path,
                          self.queues)

            # add order
            before_jobs = ['final_bam_{sampleid}'.format(**locals())]
            after_jobs = ['linkdatagen_{familyid}'.format(**locals())]
            utils.add_order(self.orders,
                            job_name,
                            before_jobs=before_jobs,
                            after_jobs=after_jobs)
示例#20
0
    def gzip_md5_clean(self, sampleID, lane):

        # print '  gzip and md5sum clean data...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo Compress and md5sum clean for sample {sampleID} start: `date "+%F %T"`\n

            cd {analydir}/QC/{sampleID}

            fq1={sampleID}_{novoid}_{flowcell}_L{lane}_1.clean.fq
            fq2={sampleID}_{novoid}_{flowcell}_L{lane}_2.clean.fq

            for fq in $fq1 $fq2;do
                if [ -s $fq.gz -a ! -s $fq ];then
                    echo $fq has been compressed.
                else
                    pigz -p4 -f $fq
                fi
                md5sum $fq.gz | unix2dos > $fq.gz.MD5.txt
            done

            echo Compress and md5sum clean for sample {sampleID} done: `date "+%F %T"`
        '''.format(analydir=self.analydir, sampleID=sampleID, **lane)

        shell_path = '{analydir}/QC/{sampleID}/gzip_md5_clean_{sampleID}_{novoid}_{flowcell}_L{lane}.sh'.format(
            analydir=self.analydir, sampleID=sampleID, **lane)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'gzip_md5_clean'
        job_name = 'gzip_md5_clean_{sampleID}_{novoid}_{flowcell}_L{lane}'.format(
            sampleID=sampleID, **lane)

        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = [
            'qc_{sampleID}_{novoid}_{flowcell}_L{lane}'.format(
                sampleID=sampleID, **lane)
        ]
        after_jobs = ['data_release']
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#21
0
文件: hla.py 项目: Leonguos/pipeline
    def hla_athlates_typing(self, sampleid, gene):

        # print '  athlates typing ...'
        cmd = '''
            set -eo pipefail
            echo hla athlates typing for {sampleid} {gene} start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/gene/{gene}

            typing \\
                -hd 2 \\
                -msa {athlates_db_dir}/msa/{gene}_nuc.txt \\
                -bam {sampleid}.{gene}.sort.bam \\
                -exlbam {sampleid}.non-{gene}.sort.bam \\
                -o {sampleid}.{gene}

            rm -f *bam

            # link result
            mkdir -p {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/result

            cd {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/result

            ln -sf ../{sampleid}/gene/{gene}/{sampleid}.{gene}.typing.txt .

            echo hla athlates typing for {sampleid} {gene} done: `date "+%F %T"`
        '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/gene/{gene}/hla_athlates_typing_{sampleid}_{gene}.sh'.format(
            **dict(self.__dict__, **locals()))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'hla_athlates_typing'
        job_name = 'hla_athlates_typing_{sampleid}_{gene}'.format(**locals())

        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['hla_sort_by_name_{sampleid}_{gene}'.format(**locals())]
        after_jobs = ['data_release']
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#22
0
文件: hla.py 项目: Leonguos/pipeline
    def hla_sambamba_markdup(self, sampleid):

        # print '  sambamba merge...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo sambamba markdup for {sampleid} start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}

            sambamba markdup \\
                -t 5 \\
                --overflow-list-size=10000000 \\
                --tmpdir=tmp \\
                {sampleid}.sort.bam \\
                {sampleid}.nodup.bam

            rm -rf tmp

            echo sambamba markdup for {sampleid} done: `date "+%F %T"`
        '''.format(sampleid=sampleid, **self.__dict__)

        shell_path = '{analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/hla_sambamba_markdup_{sampleid}.sh'.format(
            sampleid=sampleid, **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'hla_sambamba_markdup'
        job_name = 'hla_sambamba_markdup_{sampleid}'.format(sampleid=sampleid)

        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = [
            'hla_sambamba_merge_{sampleid}'.format(sampleid=sampleid)
        ]
        after_jobs = []
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#23
0
文件: hla.py 项目: Leonguos/pipeline
    def hla_picard_markdup(self, sampleid):

        # print '  picard markdup ...'
        cmd = '''
            set -eo pipefail
            echo picard markdup for {sampleid} start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}

            java1.8.0 -Xmx5g -jar {picard_jar} \\
                MarkDuplicates \\
                TMP_DIR=TMP \\
                INPUT={sampleid}.sort.bam \\
                OUTPUT={sampleid}.nodup.bam \\
                METRICS_FILE={sampleid}.nodup.metrics.txt \\
                CREATE_INDEX=true \\
                ASSUME_SORTED=true

            echo picard markdup for {sampleid} done: `date "+%F %T"`
        '''.format(sampleid=sampleid, **self.__dict__)

        shell_path = '{analydir}/Advance/{newjob}/HLA/ATHLATES_typing/{sampleid}/hla_picard_markdup_{sampleid}.sh'.format(
            sampleid=sampleid, **self.__dict__)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'hla_picard_markdup'
        job_name = 'hla_picard_markdup_{sampleid}'.format(sampleid=sampleid)

        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = [
            'hla_sambamba_merge_{sampleid}'.format(sampleid=sampleid)
        ]
        after_jobs = []
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#24
0
    def disease(self, disease_id):

        print '>  disease analysis ...'
        # write shell
        array = ','.join(map(str, self.args['analy_array']))
        cmd = '''
            set -eo pipefail
            echo disease analysis start: `date "+%F %T"`

            cd {Disease}

            python {moduledir}/Disease/disease.py \\
                -i {IntegrateResult}/Integrate.candidate.xls \\
                -o Integrate.disease.xls \\
                -id {disease_id} \\
                -enc utf8

            python {ROOT_DIR}/modules/brief/text2excel.py \\
                {BriefResults}/Disease/Integrate.disease.xlsx \\
                Integrate.disease.xls

            echo disease analysis done: `date "+%F %T"`
        '''.format(**dict(self.args, **locals()))

        shell_path = '{Disease}/disease_analysis.sh'.format(**self.args)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'disease_analysis'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['integrate_result']
        after_jobs = ['data_release']
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#25
0
    def combine_stat(self, patientID, sampleID):

        # print '  combine stat result...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo combine stat result for {sampleID} start: `date "+%F %T"`\n
            cd {analydir}/Alnstat/{sampleID}\n
            python {moduledir}/Alnstat/combine_pipe4.6.py \\
                information.xlsx \\
                {sampleID}.flagstat \\
                {sampleID} \\
                {seqstrag} \\
                > {sampleID}_mapping_coverage.txt\n
            echo combine stat result for {sampleID} done: `date "+%F %T"`
        '''.format(patientID=patientID, sampleID=sampleID, **self.args)

        shell_path = '{analydir}/Alnstat/{sampleID}/combine_stat_{sampleID}.sh'.format(
            analydir=self.analydir, sampleID=sampleID)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'combine_stat'
        job_name = 'combine_stat_{sampleID}'.format(sampleID=sampleID)

        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)
        # add order
        before_jobs = 'stat_depth_{sampleID} stat_flag_{sampleID}'.format(
            sampleID=sampleID).split()

        after_jobs = [
            'mapping_check_{sampleID}'.format(sampleID=sampleID),
            'mapping_report'
        ]
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#26
0
    def lumpy_call(self, sampleID):

        sex = self.sample_infos[sampleID]['sex']

        cmd = '''
            set -eo pipefail

            echo lumpy call for {sampleID} start: `date "+%F %T"`\n

            cd {analydir}/SV/{sampleID}/lumpy

            python {moduledir}/Varition/SV/lumpy/lumpy.py \\
                -b {analydir}/Mapping/{sampleID}.{sampleID}/{sampleID}.final.bam \\
                -r {ref} \\
                -o {sampleID}

            rm -f *bam*

            echo lumpy call for {sampleID} done: `date "+%F %T"`
        '''.format(**dict(self.__dict__, **locals()))

        shell_path = '{analydir}/SV/{sampleID}/lumpy/lumpy_call_{sampleID}.sh'.format(
            **dict(self.__dict__, **locals()))

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'lumpy_call'
        job_name = 'lumpy_call_{}'.format(sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['final_bam_{sampleID}'.format(sampleID=sampleID)]
        after_jobs = ['annotate_gff_{sampleID}'.format(sampleID=sampleID)]
        utils.add_order(self.orders,
                        job_name,
                        before_jobs=before_jobs,
                        after_jobs=after_jobs)
示例#27
0
    def qc_check(self, sampleID):

        # print '  qc check...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo qc check for sample {sampleID} start: `date "+%F %T"`

            python2 {moduledir}/QC/auto_check.py \\
                --qc_list {qc_list} \\
                --sampid {sampleID} \\
                --pwd {analydir} \\
                --check qc \\
                --jobname {newjob} \\
                --seqstrag {seqstrag} \\
                --email {email} \\
                --PE {PE} \\
                --q20 {Q20} \\
                --q30 {Q30} \\
                --error {error} \\
                --raw {rawdata}
            
            echo qc check for sample {sampleID} done: `date "+%F %T"`
        '''.format(sampleID=sampleID, **self.args)

        shell_path = '{analydir}/QC/{sampleID}/qc_check_{sampleID}.sh'.format(
            analydir=self.analydir, sampleID=sampleID)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = 'qc_check'
        job_name = 'qc_check_{sampleID}'.format(sampleID=sampleID)
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        after_jobs = ['qc_report']
        utils.add_order(self.orders, job_name, after_jobs=after_jobs)
示例#28
0
    def site_association(self):

        print '>  site association ...'
        # write shell
        cmd = '''
            set -eo pipefail
            echo site association start: `date "+%F %T"`

            cd {analydir}/Advance/{newjob}/SiteAS

            python {moduledir}/Association/site_AS/v3/SiteAS.py \\
                --pwd . \\
                --infile ../Merged_vcf/VCF/snp.merged.annovar.hg19_multianno.xls.gz \\
                --type allele \\
                --db NOVO \\
                --out AS
            
            sh AS_siteAS_Allele_NOVO.sh

            echo site association done: `date "+%F %T"`
        '''.format(**self.args)

        shell_path = '{analydir}/Advance/{newjob}/SiteAS/site_association.sh'.format(
            **self.args)

        utils.write_shell(shell_path, cmd)

        # add job
        now_point = job_name = 'site_association'
        utils.add_job(self.jobs, now_point, self.args['startpoint'],
                      self.ANALYSIS_POINTS, job_name, shell_path, self.queues)

        # add order
        before_jobs = ['annotate_merged_snp', 'annotate_merged_indel']
        after_jobs = ['data_release']
        utils.add_order(self.orders, job_name, before_jobs=before_jobs, after_jobs=after_jobs)
示例#29
0
def json_api():
    '''
    该方法提供了法律查询的接口,返回json化后的结果
    返回数据的结构为:
    {task:  {the order: [law_number, sketch of laws, complete laws],
        	the order: [law_number, sketch of laws, complete laws]},
            ...
    }
    '''
    try:
        word = request.args['info']
        law_lst = MySearch().search_word(word)  # tup是number和content组成的
        o_law_list = add_order(law_lst)  # 给tuple加入序号, 并转换为api的数据结构
        return jsonify({'task': o_law_list}), 200
    except Exception, e:
        print e
        abort(400)
        return
示例#30
0
def submit_order_handler(update, context):
    chat = utils.get_chat(context, update)
    chat_id = chat.effective_chat.id
    logger.info(f'submit_order_handler -> {context.user_data}')
    order_id = utils.add_order(context.user_data, chat_id)
    context.user_data.update(
        order_id=order_id)
    # 1. Send Order Info to admins chat
    utils.send_message_to_admin(
        context.bot,
        f'{utils.generate_full_order_info(context.user_data, chat_id)} \n\n'
        f'`User_id: {chat_id}` \n'
        f'`Order_id: {order_id}` \n',
        True,
        chat_id)
    # 2. Send notification to user
    update.message.reply_text(
        utils.generate_order_confirmation(
            context.user_data
        ),
        reply_markup=utils.get_start_kb()
    )
    done(update, context)