Пример #1
0
def connect():
    """ Connect to the PostgreSQL database server"""
    conn = None

    # Read connection parameters
    params = config()

    # Connect to the PostgreSQL server
    print_color("Connecting to the PostgreSQL database...", "HEADER")
    try:
        conn = psycopg2.connect(**params)
    finally:
        return conn
Пример #2
0
def run_commands(conn, commands):
    """ Running SQL commands in PostgreSQL database """
    # Create a cursor
    cur = conn.cursor()

    print_color("Running commands...", "WARNING")

    # Run commands one by one
    for command in commands:
        cur.execute(command)

    print_color("Commands ran successfully!", "OKGREEN")

    # Close communication with tht PostgreSQL database server
    cur.close()

    # Commit the changes
    conn.commit()
Пример #3
0
    def start(self):

        self.integrate()

        if self.args['disease'] == 'Y':
            find_disease_id = False
            for disease_id in self.args['disease_ids'].split(';'):

                if glob.glob(
                        '{disease_db}/Disease_BackGround/{disease_id}__*txt'.
                        format(**dict(self.__dict__, **locals()))):
                    find_disease_id = True
                    utils.print_color('> Disease Analysis', 'white')
                    self.disease(disease_id)
                    break
            if not find_disease_id:
                print '[warn] disease analysis will not to do, cause the disease "{}" not in database yet'.format(
                    self.args['disease_name'])
def _log_graph():
    path = '/tmp/interpreter'
    with tf.Session() as sess:
        tf.global_variables_initializer()
        tf.summary.FileWriter(path, sess.graph)
        ut.print_color(os.path.abspath(path), color=33)
Пример #5
0
            'cars',
            'columns': [{
                'name': 'id',
                'type': 'BIGSERIAL',
                'constraints': ['PRIMARY KEY']
            }, {
                'name': 'make',
                'type': 'VARCHAR(50)',
                'constraints': ['NOT NULL']
            }, {
                'name': 'model',
                'type': 'VARCHAR(50)',
                'constraints': ['NOT NULL']
            }, {
                'name': 'year',
                'type': 'INTERVAL YEAR',
                'constraints': ['NOT NULL']
            }]
        }]

        drop_cmds = parse_commands(conn, drop_pairings)
        create_cmds = parse_commands(conn, create_pairings)

        run_commands(conn, drop_cmds)
        run_commands(conn, create_cmds)
        # ------------------------------------------------------

        disconnect(conn)
    except (Exception, psycopg2.DatabaseError) as error:
        print_color(error, "FAIL")
Пример #6
0
    def start(self):

        # # tree of release directory
        # {job}/ReleaseResult
        # ├── Data
        # │   ├── BamData
        # │   └── RawData
        # ├── FinalResult
        # │   ├── 1.FilterDB-...
        # │   ├── 2. ...
        # ├── PrimaryAnalysis
        # │   ├── FilterAnalysis
        # │   └── SampleVariation
        # └── Readme

        dir_counter = iter('一 二 三 四 五 六 七 八 九 十'.split())

        dir_map = {
            'CandidateGene':
            'CandidateGene-候选基因列表',
            'FilterDB':
            'FilterDB-突变位点筛选',
            'ACMG':
            'ACMG-突变位点有害性分类',
            'FilterSV_CNV':
            'FilterSV_CNV-结构变异有害性分析',
            'Noncoding':
            'Noncoding-非编码区突变位点筛选',
            'ModelF':
            'ModelF-基于家系样本筛选',
            'Share':
            'Share-共有突变基因筛选',
            'Denovo':
            'Denovo-新生突变筛选',
            'Linkage':
            'Linkage-连锁分析',
            'ROH':
            'ROH-纯合子区域分析',
            'Network':
            'Network-候选基因相关性排序',
            'Pathway':
            'Pathway-候选基因富集分析',
            'PPI':
            'PPI-蛋白相互作用分析',
            'SiteAS':
            'SiteAS-基于位点关联分析',
            'GeneAS':
            'GeneAS-基于基因的关联分析',
            'BriefResults':
            '{analydir}/Advance/{newjob}/BriefResults'.format(**self.__dict__),
            'BriefResults':
            '{analydir}/Advance/{newjob}/BriefResults'.format(**self.__dict__),
            'Data':
            '{outdir}/ReleaseResult/Data'.format(**self.__dict__),
            'PrimaryAnalysis':
            '{outdir}/ReleaseResult/PrimaryAnalysis'.format(**self.__dict__),
            'FinalResult':
            '{outdir}/ReleaseResult/FinalResult'.format(**self.__dict__),
            'Readme':
            '{outdir}/ReleaseResult/Readme/'.format(**self.__dict__),
        }
        self.__dict__.update(**dir_map)

        # print self.ANALY_DICT.keys()
        # exit()
        utils.print_color('release {} samples:\n{}'.format(
            len(self.qc_lists), self.qc_lists.keys()))

        # RawData
        if True:
            self.release_fastq('raw')
            self.context['raw_data'] = dir_counter.next()

        # QC
        if self.ANALY_DICT['quality_control_keep_clean']:
            self.release_fastq('clean')
            self.context['clean_data'] = dir_counter.next()

        # Mapping
        if self.ANALY_DICT['mapping']:
            self.release_mapping()
            self.context['bam_data'] = dir_counter.next()

        # SNP/INDEL
        if self.ANALY_DICT['snpindel_call']:
            self.release_snp_indel('snp')
            self.release_snp_indel('indel')
            if 'primary_result' not in self.context:
                self.context['primary_result'] = dir_counter.next()
            self.context['snpindel_result'] = True

        # SV
        if self.ANALY_DICT['sv_call']:
            self.release_sv_cnv('sv')
            if 'primary_result' not in self.context:
                self.context['primary_result'] = dir_counter.next()
            self.context['sv_result'] = True

        # CNV
        if self.ANALY_DICT['cnv_call']:
            self.release_sv_cnv('cnv')
            if 'primary_result' not in self.context:
                self.context['primary_result'] = dir_counter.next()
            self.context['cnv_result'] = True

        # Circos
        if self.ANALY_DICT['cnv_call_freec'] and self.ANALY_DICT['sv_call']:
            self.release_circos()
            self.context['circos_result'] = True

        # Adcance

        if any(
            (self.ANALY_DICT['filter_acmg'], self.ANALY_DICT['filter_model'],
             self.ANALY_DICT['share_compare'], self.ANALY_DICT['denovo'])):
            self.release_candidate_gene()
            self.context['candidate_gene'] = {
                'name': '{CandidateGene}'.format(**self.__dict__)
            }
            self.context['candidate_gene'].update(
                {'counter': self.final_result_counter})

        # FilterDB
        if self.ANALY_DICT['filter_db'] or self.ANALY_DICT['filter_acmg']:
            self.final_result_counter += 1
            self.release_filter_db('snp')
            self.release_filter_db('indel')
            if 'filter_analysis' not in self.context:
                self.context['filter_analysis'] = dir_counter.next()
            self.context['filterdb'] = {
                'name': '{FilterDB}'.format(**self.__dict__)
            }
            self.context['filterdb'].update(
                {'counter': self.final_result_counter})

        # ACMG
        if self.ANALY_DICT['filter_acmg']:
            self.final_result_counter += 1
            self.release_acmg()
            self.context['filter_acmg'] = {
                'name': '{ACMG}'.format(**self.__dict__)
            }
            self.context['filter_acmg'].update(
                {'counter': self.final_result_counter})

        # FilterSV/FilterCNV
        if self.ANALY_DICT['filter_sv'] or self.ANALY_DICT['filter_cnv']:
            self.final_result_counter += 1
            if 'filter_analysis' not in self.context:
                self.context['filter_analysis'] = dir_counter.next()
            self.context['filter_sv_cnv'] = {
                'name': '{FilterSV_CNV}'.format(**self.__dict__)
            }
            self.context['filter_sv_cnv'].update(
                {'counter': self.final_result_counter})
            if self.ANALY_DICT['filter_sv']:
                self.release_filter_sv_cnv('sv')
            if self.ANALY_DICT['filter_cnv']:
                self.release_filter_sv_cnv('cnv')

        # Noncoding
        if self.ANALY_DICT['filter_noncoding']:
            self.final_result_counter += 1
            self.release_filter_noncoding()
            self.context['filter_noncoding'] = {
                'name': '{Noncoding}'.format(**self.__dict__)
            }
            self.context['filter_noncoding'].update(
                {'counter': self.final_result_counter})

        # ModelF
        if self.ANALY_DICT[
                'filter_model'] and not self.ANALY_DICT['share_compare']:
            self.final_result_counter += 1
            self.release_filter_model()
            self.context['filter_model'] = {
                'name': '{ModelF}'.format(**self.__dict__)
            }
            self.context['filter_model'].update(
                {'counter': self.final_result_counter})

        # Share
        if self.ANALY_DICT['share_compare']:
            self.final_result_counter += 1
            self.release_share_compare()
            self.context['share_compare'] = {
                'name': '{Share}'.format(**self.__dict__)
            }
            self.context['share_compare'].update(
                {'counter': self.final_result_counter})

        # Denovo
        if self.ANALY_DICT['denovo']:
            self.final_result_counter += 1
            self.context['denovo'] = {
                'name': '{Denovo}'.format(**self.__dict__)
            }
            self.context['denovo'].update(
                {'counter': self.final_result_counter})
            if any((self.ANALY_DICT['denovo_samtools'],
                    self.ANALY_DICT['denovo_triodenovo'],
                    self.ANALY_DICT['denovo_denovogear'])):
                self.release_denovo()

            if self.ANALY_DICT['denovo_sv']:
                self.release_denovo_sv_cnv('sv')

            if self.ANALY_DICT['denovo_cnv']:
                self.release_denovo_sv_cnv('cnv')

        # Linkage
        if self.ANALY_DICT['linkage']:
            self.final_result_counter += 1
            self.release_linkage()
            self.context['linkage'] = {
                'name': '{Linkage}'.format(**self.__dict__)
            }
            self.context['linkage'].update(
                {'counter': self.final_result_counter})

        # ROH
        if self.ANALY_DICT['roh']:
            self.final_result_counter += 1
            self.release_roh()
            self.context['roh'] = {'name': '{ROH}'.format(**self.__dict__)}
            self.context['roh'].update({'counter': self.final_result_counter})

        # Network
        if self.ANALY_DICT['phenolyzer']:
            self.final_result_counter += 1
            self.release_network()
            self.context['network'] = {
                'name': '{Network}'.format(**self.__dict__)
            }
            self.context['network'].update(
                {'counter': self.final_result_counter})

        # Pathway
        if self.ANALY_DICT['pathway']:
            self.final_result_counter += 1
            self.release_pathway()
            self.context['pathway'] = {
                'name': '{Pathway}'.format(**self.__dict__)
            }
            self.context['pathway'].update(
                {'counter': self.final_result_counter})

        # PPI
        if self.ANALY_DICT['ppi']:
            self.final_result_counter += 1
            self.release_ppi()
            self.context['ppi'] = {'name': '{PPI}'.format(**self.__dict__)}
            self.context['ppi'].update({'counter': self.final_result_counter})

        # SiteAS
        if self.ANALY_DICT['site_association']:
            self.final_result_counter += 1
            # self.release_site_as()
            self.context['site_as'] = {
                'name': '{SiteAS}'.format(**self.__dict__)
            }
            self.context['site_as'].update(
                {'counter': self.final_result_counter})

        # GeneAS
        if self.ANALY_DICT['gene_association']:
            self.final_result_counter += 1
            # self.release_gene_as()
            self.context['gene_as'] = {
                'name': '{GeneAS}'.format(**self.__dict__)
            }
            self.context['gene_as'].update(
                {'counter': self.final_result_counter})

        self.context['final_result'] = dir_counter.next()
        self.context['appendix'] = dir_counter.next()

        # Readme
        self.make_readme()
Пример #7
0
    def start(self):

        # temp
        advance_dirs = {
            'Merged_vcf': '{analydir}/Advance/{newjob}/Merged_vcf',
            'ACMG': '{analydir}/Advance/{newjob}/ACMG',
            'FilterSV': '{analydir}/Advance/{newjob}/FilterSV',
            'FilterCNV': '{analydir}/Advance/{newjob}/FilterCNV',
            'Noncoding': '{analydir}/Advance/{newjob}/Noncoding',
            'ModelF': '{analydir}/Advance/{newjob}/ModelF',
            'Share': '{analydir}/Advance/{newjob}/Share',
            'Denovo': '{analydir}/Advance/{newjob}/Denovo',
            'Linkage': '{analydir}/Advance/{newjob}/Linkage',
            'ROH': '{analydir}/Advance/{newjob}/ROH',
            'Network': '{analydir}/Advance/{newjob}/Network',
            'Pathway': '{analydir}/Advance/{newjob}/Pathway',
            'PPI': '{analydir}/Advance/{newjob}/PPI',
            'HLA': '{analydir}/Advance/{newjob}/HLA',
            'SiteAS': '{analydir}/Advance/{newjob}/SiteAS',
            'GeneAS': '{analydir}/Advance/{newjob}/GeneAS',
            'IntegrateResult': '{analydir}/Advance/{newjob}/IntegrateResult',
            'Disease': '{analydir}/Advance/{newjob}/Disease',
            'BriefResults': '{analydir}/Advance/{newjob}/BriefResults',
        }

        for k, v in advance_dirs.iteritems():
            self.args.update({k: v.format(**self.args)})

        # print self.args['SiteAS']
        # exit()

        # print self.analy_array
        print 'hello, {}'.format(self.username)

        # Require rawdata or not
        qc_status = utils.get_status('qc', self.startpoint,
                                     config.ANALYSIS_POINTS)
        mapping_status = utils.get_status('bwa_mem', self.startpoint,
                                          config.ANALYSIS_POINTS)

        print 'qc status:', qc_status
        print 'mapping status:', mapping_status

        ANALY_DICT = utils.get_analysis_dict(self.analy_array,
                                             config.ANALYSIS_CODE)
        self.args.update({'ANALY_DICT': ANALY_DICT})
        # print ANALY_DICT.keys();exit()

        softwares = utils.get_softwares(self.analy_array,
                                        self.args['ANALY_DICT'], self.args,
                                        self.seqstrag)
        # pprint(softwares);exit()
        self.args.update({'softwares': softwares})

        # check inputs
        self.queues = utils.check_queues(self.queues, self.username)
        self.args.update({'queues': self.queues})

        # use sentieon specific queues if needed
        if 'sentieon' in softwares.values():
            print 'add sentieon_queues'
            sentieon_queues = self.queues
            if config.CONFIG.has_option('resource', 'sentieon_queues'):
                sentieon_queues = config.CONFIG.get(
                    'resource', 'sentieon_queues').split(',')
                sentieon_queues = utils.check_queues(sentieon_queues,
                                                     self.username)
                if not sentieon_queues:
                    sentieon_queues = self.queues
            self.args.update({'sentieon_queues': sentieon_queues})

        # print self.args['sentieon_queues'];exit()
        # print sentieon_queues;exit()

        utils.check_analy_array(self.seqstrag, self.analy_array,
                                config.ANALYSIS_CODE)
        utils.check_files(self.pn, self.samp_info, self.samp_list)
        newTR = utils.check_target_region(config.CONFIG, self.seqstrag,
                                          self.refgenome, self.rawTR)
        self.args.update({'TR': newTR})

        print 'analysis items:'
        for analysis_code in self.analy_array:
            print utils.color_text(
                '{:4}  {}'.format(analysis_code,
                                  config.ANALYSIS_CODE[analysis_code][0]),
                'yellow')

        # Analysis start point
        if self.startpoint:
            if self.startpoint in config.ANALYSIS_POINTS:
                print 'start point: {}'.format(
                    utils.color_text(self.startpoint))
            else:
                print '[error] invalid startpoint: {}'.format(
                    utils.color_text(self.startpoint))

                print 'maybe you want to choose: {}'.format(
                    utils.color_text(
                        process.extractOne(self.startpoint,
                                           config.ANALYSIS_POINTS.keys())[0],
                        'cyan'))

                print 'available startpoints are as follows:\n  {}'.format(
                    '  '.join(config.ANALYSIS_POINTS.keys()))
                exit(1)

        is_advance = max(self.analy_array) > 6.1
        project = utils.Project(self.analydir, self.samp_info,
                                self.samp_info_done, self.samp_list,
                                self.qc_list, qc_status, mapping_status,
                                is_advance)

        # Extract sample_info
        print 'extract sample informations...'

        fenqi, tissue, disease_name, sample_infos, sample_infos_all, sample_done = project.get_sample_infos(
            self.samp_list, self.samp_info, self.samp_info_done, is_advance)

        database = '{}/project/DisGeNet.json'.format(
            config.CONFIG.get('software', 'soft_dir'))
        disease_ids = utils.get_disease_id(disease_name, database)
        self.args.update({
            'disease_name': disease_name,
            'disease_ids': disease_ids,
        })

        sample_infos_waiting = {
            sampleid: infos
            for sampleid, infos in sample_infos.iteritems()
            if sampleid not in sample_done
        }
        self.args.update({'sample_infos_waiting': sample_infos_waiting})
        # print sample_infos_waiting
        # exit()

        # print 'fenqi:', fenqi
        # print 'tissue:', tissue
        # exit()

        sample_lists = project.get_sample_lists
        # print sample_lists
        # print sample_infos.keys()
        # print sample_infos_all.keys()
        # for sample in sample_infos:
        #     print sample, sample_infos[sample]['familyid']
        # exit()

        if mapping_status == 'waiting':
            sample_lists = project.update_qc_list()

        print '  report number: {}'.format(utils.color_text(fenqi))
        if disease_name:
            print '  disease name: {}'.format(utils.color_text(disease_name))
            print '  disease id: {}'.format(utils.color_text(disease_ids))
        if tissue:
            print '  tissue: {}'.format(utils.color_text(tissue))
        print '  samples ({}): {}'.format(
            len(sample_infos), utils.color_text(sample_infos.keys()))

        if sample_done:
            print '  samples done({}): {}'.format(
                len(sample_done), utils.color_text(sample_done))

        # Update qc_list and extract sample_list
        # print 'update qc_list...'
        # print json.dumps(sample_lists, indent=2)

        # set memory according seqstrag
        print 'set analysis memory...'
        if self.seqstrag == 'WGS':
            print 'upate memory for WGS...'
            for analysis, memory in config.ANALYSIS_MEM_WGS.items():
                if analysis in config.ANALYSIS_POINTS:
                    config.ANALYSIS_POINTS[analysis][0] = memory
        # exit()

        # ===========================================================
        # ===========================================================
        print '>>> pipeline start...'

        mutation_soft, sv_soft, cnv_soft, denovo_soft = [
            softwares[each] for each in ('mutation', 'sv', 'cnv', 'denovo')
        ]

        print '  mutation_soft:{}, sv_soft:{}, cnv_soft:{}, denovo_soft:{}'.format(
            mutation_soft, sv_soft, cnv_soft, denovo_soft)

        # QC
        if ANALY_DICT['quality_control'] and qc_status == 'waiting':
            utils.print_color('> QC', 'white')
            QC(self.args, self.jobs, self.orders, sample_lists, config).start()

        # Mapping
        if ANALY_DICT['mapping']:
            utils.print_color('> Mapping', 'white')
            Mapping(self.args, self.jobs, self.orders, sample_lists,
                    sample_infos, config, qc_status, mapping_status).start()

        # Mutation
        if ANALY_DICT['snpindel_call']:
            utils.print_color('> Mutation', 'white')
            Mutation(self.args, self.jobs, self.orders, sample_lists,
                     sample_infos, config).start()

        # SV
        if ANALY_DICT['sv_call']:
            utils.print_color('> SV', 'white')
            SV(self.args, self.jobs, self.orders, sample_infos, config).start()

        # CNV
        if ANALY_DICT['cnv_call']:
            utils.print_color('> CNV', 'white')
            CNV(self.args, self.jobs, self.orders, sample_infos,
                config).start()

        # FilterDB
        if ANALY_DICT['filter']:
            utils.print_color('> FilterDB', 'white')
            FilterDB(self.args, self.jobs, self.orders, mutation_soft, sv_soft,
                     cnv_soft, sample_infos, config, disease_name, tissue,
                     ANALY_DICT).start()

        # ModelF
        if ANALY_DICT['filter_model']:
            utils.print_color('> Model', 'white')
            FilterModel(self.args, self.jobs, self.orders, mutation_soft,
                        sv_soft, cnv_soft, sample_infos, config).start()

        # Denovo
        if ANALY_DICT['denovo']:
            utils.print_color('> Denovo', 'white')
            Denovo(self.args, self.jobs, self.orders, mutation_soft, sv_soft,
                   cnv_soft, denovo_soft, sample_infos, config,
                   ANALY_DICT).start()

        # Linkage
        if ANALY_DICT['linkage']:
            utils.print_color('> Linkage', 'white')
            Linkage(self.args, self.jobs, self.orders, mutation_soft, sv_soft,
                    cnv_soft, denovo_soft, sample_infos_all, config,
                    ANALY_DICT).start()

        # IntegrateResult
        if any(ANALY_DICT[analysis] for analysis in
               ['filter', 'filter_model', 'denovo', 'phenolyzer']):
            utils.print_color('> IntegrateResult', 'white')
            IntegrateResult(self.args, self.jobs, self.orders, config).start()

        # ROH
        if ANALY_DICT['roh']:
            utils.print_color('> ROH', 'white')
            ROH(self.args, self.jobs, self.orders, sample_infos, mutation_soft,
                config).start()

        # OTHER
        other = Other(self.args, self.jobs, self.orders, config, disease_name)

        # IBD
        if any(ANALY_DICT[each]
               for each in ['filter_model', 'linkage', 'denovo'
                            ]) and len(sample_infos_waiting) > 1:
            utils.print_color('> IBD', 'white')
            other.ibd()

        # Network
        if ANALY_DICT['phenolyzer']:
            utils.print_color('> Phenolyzer', 'white')
            other.phenolyzer()

        # Pathway
        if ANALY_DICT['pathway']:
            utils.print_color('> Pathway', 'white')
            other.pathway()

        # PPI
        if ANALY_DICT['ppi']:
            utils.print_color('> PPI', 'white')
            other.ppi()

        # SiteAS
        if ANALY_DICT['site_association']:
            utils.print_color('> SiteAS', 'white')
            Association(self.args, self.jobs, self.orders,
                        config).site_association()

        # GeneAS
        if ANALY_DICT['gene_association']:
            utils.print_color('> GeneAS', 'white')
            Association(self.args, self.jobs, self.orders,
                        config).gene_association()

        # HLA
        if ANALY_DICT['hla']:
            utils.print_color('> HLA', 'white')
            HLA(self.args, self.jobs, self.orders, sample_lists, sample_infos,
                config, qc_status).start()

        # result and report
        utils.print_color('> Result', 'white')
        Result(self.args, self.jobs, self.orders, config).start()

        utils.print_color('> Report', 'white')
        Report(self.args, self.jobs, self.orders, config).start()

        # job summary
        print 'lenght of jobs waiting/total: {}/{}'.format(
            len([job for job in self.jobs if job.get('status') == 'waiting']),
            len(self.jobs))

        utils.write_job(self.analydir, self.newjob, self.jobs, self.orders)

        print '{:-^80}'.format(' all done ')