def calculateOverlappingStatForOneCombo(self, db, phenotype_method_id, call_method_id, analysis_method_id_ls, \ no_of_top_snps=1000, association_overlapping_type=None, commit=False, \ results_directory=None): """ 2012.3.23 pass argument db_250k to ResultsMethod2Results.rm2result() 2009-11-2 """ sys.stderr.write("Calculating overlapping stat for phenotype %s and combo %s ...\n"%(phenotype_method_id, \ repr(analysis_method_id_ls),)) session = db.session snp_id_set_ls = [] for analysis_method_id in analysis_method_id_ls: rm = Stock_250kDB.ResultsMethod.query.filter_by(phenotype_method_id=phenotype_method_id).\ filter_by(call_method_id=call_method_id).filter_by(analysis_method_id=analysis_method_id).first() if rm.id in self.results_id2snp_id_set: snp_id_set = self.results_id2snp_id_set.get(rm.id) else: association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\ filter(Stock_250kDB.Results.rank<=no_of_top_snps) no_of_association_entries = association_entries.count() if no_of_association_entries < no_of_top_snps: min_rank = no_of_association_entries + 1 max_rank = no_of_top_snps if self.snp_info is None: self.snp_info = DrawSNPRegion.getSNPInfo(db) ResultsMethod2Results.rm2result(session, rm, self.snp_info, min_rank=min_rank, max_rank=max_rank, \ commit=commit, results_directory=results_directory, db_250k=db) association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\ filter(Stock_250kDB.Results.rank<=no_of_top_snps) no_of_association_entries = association_entries.count() if no_of_association_entries != no_of_top_snps: sys.stderr.write( "Error: The number of SNPs %s from Result %s (analysis_method_id %s) doesn't match the no_of_top_snps %s.\n" % (no_of_association_entries, rm.id, rm.analysis_method_id, no_of_top_snps)) return snp_id_set = set() for entry in association_entries: snp_id_set.add(entry.snps_id) self.results_id2snp_id_set[rm.id] = snp_id_set snp_id_set_ls.append(snp_id_set) overlapping_snp_id_set = snp_id_set_ls[0] for i in range(1, len(snp_id_set_ls)): snp_id_set = snp_id_set_ls[i] overlapping_snp_id_set = overlapping_snp_id_set & snp_id_set no_of_overlapping_snps = len(overlapping_snp_id_set) entry = Stock_250kDB.AssociationOverlappingStat(phenotype_method_id=phenotype_method_id, call_method_id=call_method_id, \ no_of_top_snps=no_of_top_snps, no_of_overlapping_snps=no_of_overlapping_snps) entry.overlapping_type = association_overlapping_type session.save(entry) session.flush() sys.stderr.write("%s overlapping SNPs out of %s results. Done.\n" % (no_of_overlapping_snps, len(snp_id_set_ls)))
def calculateOverlappingStatForOneCombo(self, db, phenotype_method_id, call_method_id, analysis_method_id_ls, \ no_of_top_snps=1000, association_overlapping_type=None, commit=False, \ results_directory=None): """ 2009-11-2 """ sys.stderr.write("Calculating overlapping stat for phenotype %s and combo %s ...\n"%(phenotype_method_id, \ repr(analysis_method_id_ls),)) session = db.session snp_id_set_ls = [] for analysis_method_id in analysis_method_id_ls: rm = Stock_250kDB.ResultsMethod.query.filter_by(phenotype_method_id=phenotype_method_id).\ filter_by(call_method_id=call_method_id).filter_by(analysis_method_id=analysis_method_id).first() if rm.id in self.results_id2snp_id_set: snp_id_set = self.results_id2snp_id_set.get(rm.id) else: association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\ filter(Stock_250kDB.Results.rank<=no_of_top_snps) no_of_association_entries = association_entries.count() if no_of_association_entries<no_of_top_snps: min_rank = no_of_association_entries+1 max_rank = no_of_top_snps if self.snp_info is None: self.snp_info = DrawSNPRegion.getSNPInfo(db) ResultsMethod2Results.rm2result(session, rm, self.snp_info, min_rank=min_rank, max_rank=max_rank, \ commit=commit, results_directory=results_directory) association_entries = Stock_250kDB.Results.query.filter_by(results_id=rm.id).\ filter(Stock_250kDB.Results.rank<=no_of_top_snps) no_of_association_entries = association_entries.count() if no_of_association_entries!=no_of_top_snps: sys.stderr.write("Error: The number of SNPs %s from Result %s (analysis_method_id %s) doesn't match the no_of_top_snps %s.\n"%(no_of_association_entries, rm.id, rm.analysis_method_id, no_of_top_snps)) return snp_id_set = set() for entry in association_entries: snp_id_set.add(entry.snps_id) self.results_id2snp_id_set[rm.id] = snp_id_set snp_id_set_ls.append(snp_id_set) overlapping_snp_id_set = snp_id_set_ls[0] for i in range(1, len(snp_id_set_ls)): snp_id_set = snp_id_set_ls[i] overlapping_snp_id_set = overlapping_snp_id_set&snp_id_set no_of_overlapping_snps = len(overlapping_snp_id_set) entry = Stock_250kDB.AssociationOverlappingStat(phenotype_method_id=phenotype_method_id, call_method_id=call_method_id, \ no_of_top_snps=no_of_top_snps, no_of_overlapping_snps=no_of_overlapping_snps) entry.overlapping_type = association_overlapping_type session.save(entry) session.flush() sys.stderr.write("%s overlapping SNPs out of %s results. Done.\n"%(no_of_overlapping_snps, len(snp_id_set_ls)))
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session #session.begin() snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest) gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef) snp_info = DrawSNPRegion.getSNPInfo(db) snp_annotation_short_name2id = self.getSNPAnnotationShortName2id() self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id) if self.commit: session.flush() session.commit()
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session #session.begin() snps_context_wrapper = GeneListRankTest.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, \ self.get_closest) gene_annotation = DrawSNPRegion.dealWithGeneAnnotation(self.gene_annotation_picklef, tax_id=self.tax_id, \ cls_with_db_args=self) snp_info = DrawSNPRegion.getSNPInfo(db) snp_annotation_short_name2id = self.getSNPAnnotationShortName2id() self._constructSNPAnnotation(session, snp_info, snps_context_wrapper, gene_annotation, snp_annotation_short_name2id) if self.commit: session.flush() session.commit()
def run(self): """ 2009-6-10 set Results.beta = getattr(data_obj, 'beta1', None) """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session session.begin() snp_info = DrawSNPRegion.getSNPInfo(db) query = Stock_250kDB.ResultsMethod.query.filter_by(call_method_id=self.call_method_id).\ filter(Stock_250kDB.ResultsMethod.analysis_method_id.in_(self.analysis_method_id_ls)) for rm in query: self.rm2result(session, rm, snp_info, max_rank=self.max_rank, commit=self.commit, results_directory=self.results_directory) if self.commit: session.commit()
def run(self): """ 2008-12-08 if the plot under configuration is already in db, abort only if the program is gonna commit the database transaction. 2008-10-19 save figures in database if commit """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session #session.begin() if self.results_type==1: ResultsClass = Stock_250kDB.ResultsMethod snps_context_wrapper = self.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest) elif self.results_type==2: ResultsClass = Stock_250kDB.ResultsByGene else: sys.stderr.write("Invalid results type : %s.\n"%self.results_type) return None hist_type = self.getHistType(self.call_method_id, self.min_distance, self.get_closest, self.min_MAF, \ self.allow_two_sample_overlapping, self.results_type, self.null_distribution_type_id) candidate_gene_list = self.getGeneList(self.list_type_id) if len(candidate_gene_list)<self.min_sample_size: sys.stderr.write("Candidate gene list of %s too small: %s.\n"%(self.list_type_id, len(candidate_gene_list))) sys.exit(4) #candidate_gene_list = [] #2009-01-12 just to plot the histogram of pvalue candidate_gene_set = Set(candidate_gene_list) list_type = Stock_250kDB.GeneListType.get(self.list_type_id) if list_type is None: sys.exit(3) phenotype_id2results_id_ls = self.getResultsIDLs(db, ResultsClass, self.results_type, self.phenotype_id_ls, \ self.min_distance, self.get_closest, self.min_MAF, self.call_method_id) param_data = PassingData(results_directory=self.results_directory, candidate_gene_list=candidate_gene_list, \ min_MAF=self.min_MAF, allow_two_sample_overlapping=self.allow_two_sample_overlapping, need_the_value=1, \ do_log10_transformation=False) #need_the_value means to get the pvalue/score #force no log10 transformation. otherwise, transformation based on analysis_method if self.null_distribution_type_id==2 or self.null_distribution_type_id==3: #gw-looping or random gene list snp_info = DrawSNPRegion.getSNPInfo(db) candidate_gene_snp_index_ls = self.get_candidate_gene_snp_index_ls(candidate_gene_set, snp_info.chr_pos_ls, snps_context_wrapper) no_of_snps = len(snp_info.chr_pos_ls) no_of_permutations = no_of_snps/len(candidate_gene_snp_index_ls) + 1 param_data.chr_pos2index = snp_info.chr_pos2index #pass to getGenomeWideResultFromFile if self.null_distribution_type_id==2: non_candidate_gene_snp_index_ls = self.get_non_candidate_gene_snp_index_ls_by_permutation(candidate_gene_snp_index_ls, no_of_snps, no_of_permutations) elif self.null_distribution_type_id == 3: gene_id_ls = get_total_gene_ls(db.metadata.bind) no_of_candidate_genes = len(candidate_gene_set) non_candidate_gene_snp_index_ls = numpy.zeros(0, numpy.int) while len(non_candidate_gene_snp_index_ls)<no_of_snps: non_candidate_gene_set = Set(random.sample(gene_id_ls, no_of_candidate_genes)) _non_candidate_gene_snp_index_ls = self.get_candidate_gene_snp_index_ls(non_candidate_gene_set, snp_info.chr_pos_ls, snps_context_wrapper) non_candidate_gene_snp_index_ls = numpy.hstack((non_candidate_gene_snp_index_ls, _non_candidate_gene_snp_index_ls)) for phenotype_id, results_id_ls in phenotype_id2results_id_ls.iteritems(): if hist_type.id: #hist_type already in database rows = Stock_250kDB.ScoreRankHistogram.query.filter_by(phenotype_method_id=phenotype_id).\ filter_by(list_type_id=self.list_type_id).filter_by(hist_type_id=hist_type.id) if rows.count()>0 and self.commit: #2008-12-08 only skip if the database transaction is gonna commit. row = rows.first() sys.stderr.write("Histogram already in database. id=%s, phenotype_id=%s, list_type_id=%s, hist_type_id=%s.\n"%\ (row.id, row.phenotype_method_id, row.list_type_id, row.hist_type_id)) continue phenotype_method = Stock_250kDB.PhenotypeMethod.get(phenotype_id) if not phenotype_method: continue score_rank_data_ls = [] sys.stderr.write("Checking phenotype %s (%s) on list_type %s (%s) ...\n"%\ (phenotype_method.id, phenotype_method.short_name, list_type.id, list_type.short_name)) for results_id in results_id_ls: try: rm = ResultsClass.get(results_id) score_rank_data = None if self.null_distribution_type_id==1: if self.results_type==1: permData = self.prepareDataForPermutationRankTest(rm, snps_context_wrapper, param_data) if not permData: continue score_rank_data = PassingData(candidate_score_ls=permData.candidate_gene_snp_value_ls, \ candidate_rank_ls=permData.candidate_gene_snp_rank_ls,\ non_candidate_score_ls=permData.non_candidate_gene_snp_value_ls, non_candidate_rank_ls=permData.non_candidate_gene_snp_rank_ls,\ analysis_method=rm.analysis_method) del permData elif self.results_type==2: score_rank_data = self.getScoreRankFromRBG(rm, candidate_gene_set, self.results_directory) elif self.null_distribution_type_id==2 or self.null_distribution_type_id==3: genome_wide_result = self.getResultMethodContent(rm, param_data.results_directory, param_data.min_MAF, pdata=param_data) if not genome_wide_result: continue score_rank_data = self.getScoreRankFromPermIndexLs(genome_wide_result, candidate_gene_snp_index_ls, non_candidate_gene_snp_index_ls) if score_rank_data: score_rank_data.analysis_method = rm.analysis_method if score_rank_data: score_rank_data_ls.append(score_rank_data) except: sys.stderr.write("Exception happened for results_id=%s, phenotype_id=%s.\n"%(results_id, phenotype_id)) traceback.print_exc() sys.stderr.write('%s.\n'%repr(sys.exc_info())) continue if score_rank_data_ls: score_png_data, score_svg_data = self.plotHistForOnePhenotype(phenotype_method, list_type, score_rank_data_ls, self.output_dir, data_type='score', commit=self.commit) rank_png_data, rank_svg_data = self.plotHistForOnePhenotype(phenotype_method, list_type, score_rank_data_ls, self.output_dir, data_type='rank', commit=self.commit) if self.commit: score_rank_hist = Stock_250kDB.ScoreRankHistogram(phenotype_method_id=phenotype_id, list_type_id=list_type.id) score_rank_hist.hist_type = hist_type score_rank_hist.score_hist = score_png_data.getvalue() score_rank_hist.score_hist_svg = score_svg_data.getvalue() score_rank_hist.rank_hist = rank_png_data.getvalue() score_rank_hist.rank_hist_svg = rank_svg_data.getvalue() session.save(score_rank_hist) session.flush() del score_png_data, score_svg_data, rank_png_data, rank_svg_data """