Exemplo n.º 1
0
	def run(self):	
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		
		results_method_id_info = self.getResultsMethodIDInfo(db, self.call_method_id_ls, self.min_distance, self.get_closest, self.min_MAF)
		results_method_id2gene_set = self.getResultsMethodID2GeneSet(db, results_method_id_info, self.results_directory, self.max_rank)
		rdata = self.getDataMatrix(results_method_id2gene_set, results_method_id_info)
		
		header = ['', ''] + results_method_id_info.results_method_id_label_ls
		strain_acc_list = results_method_id_info.results_method_id_label_ls
		category_list = results_method_id_info.results_method_id_ls
		if SNPData.isDataMatrixEmpty(rdata.data_matrix):
			sys.stderr.write("Nothing fetched from database.\n")
			sys.exit(3)
		if self.output_fname:
			write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		if self.fig_fname:
			font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
			value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
			im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
			#im.save('%s_legend.png'%self.fig_fname_prefix)
			im = drawMatrix(rdata.data_matrix, value2color_func, strain_acc_list,\
						strain_acc_list, with_grid=1, font=font)
			im = combineTwoImages(im, im_legend, font=font)
			im.save(self.fig_fname)
Exemplo n.º 2
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		
		import MySQLdb
		mysql_conn = MySQLdb.connect(db=self.dbname, host='banyan.usc.edu', user = self.db_user, passwd = self.db_passwd)
		mysql_curs = mysql_conn.cursor()
		from pymodule.utils import get_gene_id2gene_symbol
		gene_id2gene_symbol = get_gene_id2gene_symbol(mysql_curs, 3702, table='genome.gene', upper_case_gene_symbol=0)	#3702 is At's tax id
		
		self.get_closest = 0
		gene_id2score = self.get_results_by_gene(self.call_method_id, self.analysis_method_id, self.phenotype_method_id,\
				self.min_distance, self.get_closest, self.min_MAF, self.results_directory, self.score_is_rank)
		
		pathway_id2gene_id_ls = self.get_pathway_id2gene_id_ls()
		
		min_score, max_score = self.get_min_max_score(pathway_id2gene_id_ls, gene_id2score)
		
		value2color_func = lambda x: Value2Color.value2HSLcolor(x, min_score, max_score)
		
		
		if not gene_id2score:
			sys.stderr.write("gene_id2score is empty. exit.\n")
			sys.exit(3)
		
		gap = 0.05
		
		axe_auto_x_offset = 0.05
		axe_auto_y_offset = 0.7
		axe_auto_width = 0.3
		axe_auto_height = 0.2
		axe_auto = pylab.axes([axe_auto_x_offset, axe_auto_y_offset, axe_auto_width, axe_auto_height])
		
		
		axe_vern_x_offset = axe_auto_x_offset + axe_auto_width + gap
		axe_vern_y_offset = axe_auto_y_offset
		axe_vern_width = axe_auto_width*0.75
		axe_vern_height = axe_auto_height
		axe_vern = pylab.axes([axe_vern_x_offset, axe_vern_y_offset, axe_vern_width, axe_vern_height])
		
		axe_light_x_offset = axe_vern_x_offset + axe_vern_width + gap
		axe_light_y_offset = axe_vern_y_offset
		axe_light_width = axe_auto_width
		axe_light_height = axe_auto_height
		axe_light = pylab.axes([axe_light_x_offset, axe_light_y_offset, axe_light_width, axe_light_height])
		
		axe_flc_act_x_offset = 0.05
		axe_flc_act_y_offset = 0.45
		axe_flc_act_width = 0.3
		axe_flc_act_height = 0.2
		axe_flc_act = pylab.axes([axe_flc_act_x_offset, axe_flc_act_y_offset, axe_flc_act_width, axe_flc_act_height])

		axe_repr_x_offset = 0.05
		axe_repr_y_offset = 0.1
		axe_repr_width  = 0.3
		axe_repr_height = 0.2
		axe_repr = pylab.axes([axe_repr_x_offset, axe_repr_y_offset, axe_repr_width, axe_repr_height] )
		
		axe_flc_x_offset = 0.4
		axe_flc_y_offset = 0.5
		axe_flc_width = 0.1
		axe_flc_height = axe_flc_act_height/3
		axe_flc = pylab.axes([axe_flc_x_offset, axe_flc_y_offset, axe_flc_width, axe_flc_height])
		
		axe_co_x_offset = 0.6
		axe_co_y_offset = axe_flc_y_offset
		axe_co_width = axe_flc_width
		axe_co_height = axe_flc_height
		axe_co = pylab.axes([axe_co_x_offset, axe_co_y_offset, axe_co_width, axe_co_height])
		
		axe_gibbe_x_offset = axe_co_x_offset + axe_co_width + gap
		axe_gibbe_y_offset = axe_co_y_offset
		axe_gibbe_width = axe_auto_width*0.75
		axe_gibbe_height = axe_co_height
		axe_gibbe = pylab.axes([axe_gibbe_x_offset, axe_gibbe_y_offset, axe_gibbe_width, axe_gibbe_height ])
		
		
		axe_int_x_offset = 0.4
		axe_int_y_offset = 0.3
		axe_int_width = 0.3
		axe_int_height = 0.05
		axe_int = pylab.axes([axe_int_x_offset, axe_int_y_offset, axe_int_width, axe_int_height])
		
		axe_meri_x_offset = 0.4
		axe_meri_y_offset = 0.1
		axe_meri_width = axe_int_width
		axe_meri_height = axe_int_height
		axe_meri = pylab.axes([axe_meri_x_offset, axe_meri_y_offset, axe_meri_width, axe_meri_height])
		
		axe_flower = pylab.axes([axe_meri_x_offset, 0.01, axe_meri_width, 0.05])
		axe_flower.text(0.5, 0.5, 'Flowering', \
								horizontalalignment ='center', verticalalignment='center', size=8)
		#iteration over pathway_id2axe will do. omit this section.
		axe_flower.set_xticks([])
		axe_flower.set_yticks([])
		
		axe_score_legend = pylab.axes([axe_int_x_offset+axe_int_width+0.1, 0.01, 0.1, 0.3], frameon=False)	#axes for the legend of LD
		axe_score_legend.set_xticks([])
		axe_score_legend.set_yticks([])
		
		axe_cover = pylab.axes([0.03,0.01, 0.95,0.95],frameon=False)
		axe_cover.set_xticks([])
		axe_cover.set_yticks([])
		
		pathway_id2axe = {1:axe_flc_act, 2: axe_repr, 3:axe_light, 4:axe_gibbe, 5:axe_vern, 6:axe_int, 7: axe_meri, \
						8:axe_flc, 9:axe_co, 10:axe_auto}
		#draw edges first to avoid the edges overwriting the pathway labels
		pathway_edges = Stock_250kDB.FTPathwayRelationship.query.all()
		
		for pathway_edge in pathway_edges:
			
			axe_start = pathway_id2axe[pathway_edge.pathway1_id]
			axe_stop = pathway_id2axe[pathway_edge.pathway2_id]
			axe_cover.set_xlim([0,1])
			axe_cover.set_ylim([0,1])
			self.draw_edge_between_pathways(axe_cover, axe_start, axe_stop, edge_type=pathway_edge.relationship_type_id, \
				arrow_start_point_loc =pathway_edge.arrow_start_point_loc,\
				arrow_end_point_loc=pathway_edge.arrow_end_point_loc)
			axe_cover.set_xlim([0,1])
			axe_cover.set_ylim([0,1])
		
		for pathway_id, axe in pathway_id2axe.iteritems():
			axe.set_xticks([])
			axe.set_yticks([])
			pathway = Stock_250kDB.FTPathway.get(pathway_id)
			axe.title.set_text(pathway.short_name)
			gene_id_ls = pathway_id2gene_id_ls.get(pathway_id)
			if gene_id_ls:
				self.draw_pathway_box(gene_id_ls, axe, gene_id2gene_symbol, gene_id2score, value2color_func, x_span=1., y_span=1., \
						gene_label_size=8)
		
		#draw the final edge from axe_meri to axe_flower
		self.draw_edge_between_pathways(axe_cover, axe_meri, axe_flower, edge_type=0, arrow_start_point_loc='bottom middle',\
					arrow_end_point_loc='top middle')
		
		phenotype_method = Stock_250kDB.PhenotypeMethod.get(self.phenotype_method_id)
		analysis_method = Stock_250kDB.AnalysisMethod.get(self.analysis_method_id)
		axe_cover.title.set_text('%s by %s'%(phenotype_method.short_name, analysis_method.short_name))
		axe_cover.set_xlim([0,1])
		axe_cover.set_ylim([0,1])

		self.drawLegend(axe_score_legend, value2color_func, min_score, max_score, no_of_bands = 20, score_is_rank=self.score_is_rank)
		
		
		
		png_output_fname = '%s.png'%self.output_fname_prefix
		pylab.savefig(png_output_fname, dpi=600)
		pylab.savefig('%s.svg'%self.output_fname_prefix)
Exemplo n.º 3
0
	def run(self):	
		if self.debug:
			import pdb
			pdb.set_trace()
		db = StockDB.StockDB(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		order_by_sentence = " order by c.longitude, c.latitude, e.longitude, e.latitude, e.nativename "	#how to order strains.
		if self.QC_method_id ==4:
			sql_table_str = "from %s e, %s s, %s a, %s c"%(StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
								StockDB.Country.table.name)
			common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s " + order_by_sentence
			
			strain_where_condition = common_where_condition%(" and e.id=st.ecotypeid")
			strain_id_info_query = "select distinct st.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s"%(sql_table_str, StockDB.Strain.table.name, strain_where_condition)
		else:
			sql_table_str = "from %s q, %s e, %s s, %s a, %s c"%(StockDB.QCCrossMatch.table.name, StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
									StockDB.Country.table.name)
			common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s"+ " and q.qc_method_id=%s and q.no_of_non_NA_pairs>=%s and q.mismatch_rate<=%s "%\
				(self.QC_method_id, self.min_no_of_non_NAs, self.max_mismatch_rate) + order_by_sentence
			
			strain_where_condition = common_where_condition%(" and e.id=st.ecotypeid and st.id=q.strainid")
			strain_id_info_query = "select distinct q.strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s"%(sql_table_str, StockDB.Strain.table.name, strain_where_condition)
		
		if self.how_to_group_strains==2 or self.how_to_group_strains==3:
			plate_info = self.alignStrainsAccordingToSeqPlate(db)
			id_set_data = PassingData()
			id_set_data.strain_id_set = None
			id_set_data.target_id_set = None
		elif self.input_fname:
			id_set_data = self.getStrainidTargetidFromFile(db, self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs)
		else:
			id_set_data = PassingData()
			id_set_data.strain_id_set = None
			id_set_data.target_id_set = None
		
		if self.how_to_group_strains==2 or self.how_to_group_strains==3:
			strain_id_info = self.getStrainInfoGivenPlateInfo(db, plate_info, strain_id_info_query, strain_id_set=None)
		else:
			strain_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set)
		
		if self.QC_method_id==4:
			if self.how_to_group_strains==3:
				#2008-09-15 column strain id is in country, strain-longitude order
				target_id_info = self.getStrainIDInfo(db, strain_id_info_query, id_set_data.strain_id_set)
			else:
				target_id_info = strain_id_info
		else:
			target_where_condition = common_where_condition%(" and e.id=q.target_id")
			target_id_info_query = "select distinct e.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s %s"%(sql_table_str, target_where_condition)
			target_id_info = self.getStrainIDInfo(db, target_id_info_query)
		
		if self.input_fname:
			rdata = self.get_data_matrixFromFile(db, strain_id_info, target_id_info,  self.QC_method_id, self.input_fname, self.max_mismatch_rate, self.min_no_of_non_NAs)
		else:
			rdata = self.get_data_matrix(db, strain_id_info, target_id_info, self.QC_method_id, self.max_mismatch_rate, self.min_no_of_non_NAs)
		
		rdata.data_matrix = self.markDataMatrixBoundary(rdata.data_matrix, strain_id_info, target_id_info)
		
		header = ['strain info', ''] + target_id_info.strain_label_ls
		strain_acc_list = strain_id_info.strain_label_ls
		category_list = [1]*len(strain_acc_list)
		if SNPData.isDataMatrixEmpty(rdata.data_matrix):
			sys.stderr.write("Nothing fetched from database.\n")
			sys.exit(3)
		if self.output_fname:
			write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		if self.fig_fname:
			font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
			value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
			im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
			#im.save('%s_legend.png'%self.fig_fname_prefix)
			im = drawMatrix(rdata.data_matrix, value2color_func, strain_id_info.strain_label_ls,\
						target_id_info.strain_label_ls, with_grid=1, font=font)
			im = combineTwoImages(im, im_legend, font=font)
			im.save(self.fig_fname)
Exemplo n.º 4
0
    def run(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        db = StockDB.StockDB(drivername=self.drivername,
                             username=self.db_user,
                             password=self.db_passwd,
                             hostname=self.hostname,
                             database=self.dbname,
                             schema=self.schema)
        db.setup(create_tables=False)
        session = db.session
        order_by_sentence = " order by c.longitude, c.latitude, e.longitude, e.latitude, e.nativename "  #how to order strains.
        if self.QC_method_id == 4:
            sql_table_str = "from %s e, %s s, %s a, %s c"%(StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
                 StockDB.Country.table.name)
            common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s " + order_by_sentence

            strain_where_condition = common_where_condition % (
                " and e.id=st.ecotypeid")
            strain_id_info_query = "select distinct st.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s" % (
                sql_table_str, StockDB.Strain.table.name,
                strain_where_condition)
        else:
            sql_table_str = "from %s q, %s e, %s s, %s a, %s c"%(StockDB.QCCrossMatch.table.name, StockDB.Ecotype.table.name, StockDB.Site.table.name, StockDB.Address.table.name,\
                  StockDB.Country.table.name)
            common_where_condition = "where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id %s"+ " and q.qc_method_id=%s and q.no_of_non_NA_pairs>=%s and q.mismatch_rate<=%s "%\
             (self.QC_method_id, self.min_no_of_non_NAs, self.max_mismatch_rate) + order_by_sentence

            strain_where_condition = common_where_condition % (
                " and e.id=st.ecotypeid and st.id=q.strainid")
            strain_id_info_query = "select distinct q.strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s, %s st %s" % (
                sql_table_str, StockDB.Strain.table.name,
                strain_where_condition)

        if self.how_to_group_strains == 2 or self.how_to_group_strains == 3:
            plate_info = self.alignStrainsAccordingToSeqPlate(db)
            id_set_data = PassingData()
            id_set_data.strain_id_set = None
            id_set_data.target_id_set = None
        elif self.input_fname:
            id_set_data = self.getStrainidTargetidFromFile(
                db, self.QC_method_id, self.input_fname,
                self.max_mismatch_rate, self.min_no_of_non_NAs)
        else:
            id_set_data = PassingData()
            id_set_data.strain_id_set = None
            id_set_data.target_id_set = None

        if self.how_to_group_strains == 2 or self.how_to_group_strains == 3:
            strain_id_info = self.getStrainInfoGivenPlateInfo(
                db, plate_info, strain_id_info_query, strain_id_set=None)
        else:
            strain_id_info = self.getStrainIDInfo(db, strain_id_info_query,
                                                  id_set_data.strain_id_set)

        if self.QC_method_id == 4:
            if self.how_to_group_strains == 3:
                #2008-09-15 column strain id is in country, strain-longitude order
                target_id_info = self.getStrainIDInfo(
                    db, strain_id_info_query, id_set_data.strain_id_set)
            else:
                target_id_info = strain_id_info
        else:
            target_where_condition = common_where_condition % (
                " and e.id=q.target_id")
            target_id_info_query = "select distinct e.id as strainid, e.id as ecotypeid, e.nativename, s.name as sitename, c.abbr %s %s" % (
                sql_table_str, target_where_condition)
            target_id_info = self.getStrainIDInfo(db, target_id_info_query)

        if self.input_fname:
            rdata = self.get_data_matrixFromFile(db, strain_id_info,
                                                 target_id_info,
                                                 self.QC_method_id,
                                                 self.input_fname,
                                                 self.max_mismatch_rate,
                                                 self.min_no_of_non_NAs)
        else:
            rdata = self.get_data_matrix(db, strain_id_info, target_id_info,
                                         self.QC_method_id,
                                         self.max_mismatch_rate,
                                         self.min_no_of_non_NAs)

        rdata.data_matrix = self.markDataMatrixBoundary(
            rdata.data_matrix, strain_id_info, target_id_info)

        header = ['strain info', ''] + target_id_info.strain_label_ls
        strain_acc_list = strain_id_info.strain_label_ls
        category_list = [1] * len(strain_acc_list)
        if SNPData.isDataMatrixEmpty(rdata.data_matrix):
            sys.stderr.write("Nothing fetched from database.\n")
            sys.exit(3)
        if self.output_fname:
            write_data_matrix(rdata.data_matrix, self.output_fname, header,
                              strain_acc_list, category_list)

        if self.fig_fname:
            font = get_font(self.font_path,
                            font_size=self.font_size)  #2008-08-01
            value2color_func = lambda x: Value2Color.value2HSLcolor(
                x, rdata.min_value, rdata.max_value)
            im_legend = drawContinousLegend(rdata.min_value, rdata.max_value,
                                            self.no_of_ticks, value2color_func,
                                            font)
            #im.save('%s_legend.png'%self.fig_fname_prefix)
            im = drawMatrix(rdata.data_matrix, value2color_func, strain_id_info.strain_label_ls,\
               target_id_info.strain_label_ls, with_grid=1, font=font)
            im = combineTwoImages(im, im_legend, font=font)
            im.save(self.fig_fname)
Exemplo n.º 5
0
	def run(self):	
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
						password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup()
		session = db.session
		
		if self.test_result_type==1:
			test_result_class_table = CandidateGeneRankSumTestResult.table.name
			test_result_class_table = 'candidate_gene_rank_sum_test_result_2008_09_15'
		elif self.test_result_type==2:
			test_result_class_table = CandidateGeneTopSNPTest.table.name
		elif self.test_result_type==3:
			test_result_class_table = Stock_250kDB.CandidateGeneRankSumTestResultMethod.table.name
		else:
			sys.stderr.write(" test_result_type %s not supported.\n"%(self.test_result_type))
			sys.exit(2)

		#the condition for min_MAF is tricky because of the floating precision.
		if self.test_result_type==1:
			where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\
				%(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
		elif self.test_result_type==2:
			where_condition = "%s r, %s rg, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null and r.id=rg.results_method_id \
				and c.results_id=rg.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\
				%(ResultsMethod.table.name, ResultsByGene.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
		elif self.test_result_type==3:
			where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"\
				%(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
		if self.call_method_id_ls:
			where_condition += " and r.call_method_id in (%s)"%self.call_method_id_ls
		
		if self.analysis_method_id_ls:
			where_condition += " and r.analysis_method_id in (%s)"%self.analysis_method_id_ls
		if self.super_type_id:
			where_condition += " and g.super_type_id=%s"%self.super_type_id
		
		if self.test_type:
			where_condition += " and c.test_type=%s"%self.test_type
		
		if self.test_result_type==1:
			pass
			where_condition += " and c.max_pvalue_per_gene=%s"%(self.max_pvalue_per_gene)
		elif self.test_result_type==2:
			where_condition += " and c.no_of_top_snps=%s"%(self.no_of_top_snps)		
		
		list_type_id_ls = self.getListTypeInfo(db, where_condition)
		analysis_method_id_ls = self.getAnalysisMethodInfo(db, where_condition)
		list_type_analysis_method_info = self.orderListTypeAnalysisMethodID(list_type_id_ls, analysis_method_id_ls)
		phenotype_info = self.getPhenotypeInfo(db, where_condition)
		rdata = self.get_data_matrix(db, phenotype_info, list_type_analysis_method_info, where_condition)
		
		rdata.data_matrix = self.markDataMatrixBoundary(rdata.data_matrix, phenotype_info, list_type_analysis_method_info)
		
		header = ['list_type_analysis_method', ''] + phenotype_info.phenotype_method_label_ls
		strain_acc_list = list_type_analysis_method_info.list_type_analysis_method_label_ls
		category_list = list_type_analysis_method_info.list_type_id_analysis_method_id_ls
		if SNPData.isDataMatrixEmpty(rdata.data_matrix):
			sys.stderr.write("Nothing fetched from database.\n")
			sys.exit(3)
		if self.output_fname:
			write_data_matrix(rdata.data_matrix, self.output_fname, header, strain_acc_list, category_list)
		
		if self.fig_fname:
			font = get_font(self.font_path, font_size=self.font_size)	#2008-08-01
			value2color_func = lambda x: Value2Color.value2HSLcolor(x, rdata.min_value, rdata.max_value)
			im_legend = drawContinousLegend(rdata.min_value, rdata.max_value, self.no_of_ticks, value2color_func, font)
			#im.save('%s_legend.png'%self.fig_fname_prefix)
			im = drawMatrix(rdata.data_matrix, value2color_func, list_type_analysis_method_info.list_type_analysis_method_label_ls,\
						phenotype_info.phenotype_method_label_ls, with_grid=1, font=font)
			im = combineTwoImages(im, im_legend, font=font)
			im.save(self.fig_fname)
Exemplo n.º 6
0
    def run(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup(create_tables=False)
        session = db.session

        import MySQLdb
        mysql_conn = MySQLdb.connect(db=self.dbname,
                                     host='banyan.usc.edu',
                                     user=self.db_user,
                                     passwd=self.db_passwd)
        mysql_curs = mysql_conn.cursor()
        from pymodule.utils import get_gene_id2gene_symbol
        gene_id2gene_symbol = get_gene_id2gene_symbol(
            mysql_curs, 3702, table='genome.gene',
            upper_case_gene_symbol=0)  #3702 is At's tax id

        self.get_closest = 0
        gene_id2score = self.get_results_by_gene(self.call_method_id, self.analysis_method_id, self.phenotype_method_id,\
          self.min_distance, self.get_closest, self.min_MAF, self.results_directory, self.score_is_rank)

        pathway_id2gene_id_ls = self.get_pathway_id2gene_id_ls()

        min_score, max_score = self.get_min_max_score(pathway_id2gene_id_ls,
                                                      gene_id2score)

        value2color_func = lambda x: Value2Color.value2HSLcolor(
            x, min_score, max_score)

        if not gene_id2score:
            sys.stderr.write("gene_id2score is empty. exit.\n")
            sys.exit(3)

        gap = 0.05

        axe_auto_x_offset = 0.05
        axe_auto_y_offset = 0.7
        axe_auto_width = 0.3
        axe_auto_height = 0.2
        axe_auto = pylab.axes([
            axe_auto_x_offset, axe_auto_y_offset, axe_auto_width,
            axe_auto_height
        ])

        axe_vern_x_offset = axe_auto_x_offset + axe_auto_width + gap
        axe_vern_y_offset = axe_auto_y_offset
        axe_vern_width = axe_auto_width * 0.75
        axe_vern_height = axe_auto_height
        axe_vern = pylab.axes([
            axe_vern_x_offset, axe_vern_y_offset, axe_vern_width,
            axe_vern_height
        ])

        axe_light_x_offset = axe_vern_x_offset + axe_vern_width + gap
        axe_light_y_offset = axe_vern_y_offset
        axe_light_width = axe_auto_width
        axe_light_height = axe_auto_height
        axe_light = pylab.axes([
            axe_light_x_offset, axe_light_y_offset, axe_light_width,
            axe_light_height
        ])

        axe_flc_act_x_offset = 0.05
        axe_flc_act_y_offset = 0.45
        axe_flc_act_width = 0.3
        axe_flc_act_height = 0.2
        axe_flc_act = pylab.axes([
            axe_flc_act_x_offset, axe_flc_act_y_offset, axe_flc_act_width,
            axe_flc_act_height
        ])

        axe_repr_x_offset = 0.05
        axe_repr_y_offset = 0.1
        axe_repr_width = 0.3
        axe_repr_height = 0.2
        axe_repr = pylab.axes([
            axe_repr_x_offset, axe_repr_y_offset, axe_repr_width,
            axe_repr_height
        ])

        axe_flc_x_offset = 0.4
        axe_flc_y_offset = 0.5
        axe_flc_width = 0.1
        axe_flc_height = axe_flc_act_height / 3
        axe_flc = pylab.axes([
            axe_flc_x_offset, axe_flc_y_offset, axe_flc_width, axe_flc_height
        ])

        axe_co_x_offset = 0.6
        axe_co_y_offset = axe_flc_y_offset
        axe_co_width = axe_flc_width
        axe_co_height = axe_flc_height
        axe_co = pylab.axes(
            [axe_co_x_offset, axe_co_y_offset, axe_co_width, axe_co_height])

        axe_gibbe_x_offset = axe_co_x_offset + axe_co_width + gap
        axe_gibbe_y_offset = axe_co_y_offset
        axe_gibbe_width = axe_auto_width * 0.75
        axe_gibbe_height = axe_co_height
        axe_gibbe = pylab.axes([
            axe_gibbe_x_offset, axe_gibbe_y_offset, axe_gibbe_width,
            axe_gibbe_height
        ])

        axe_int_x_offset = 0.4
        axe_int_y_offset = 0.3
        axe_int_width = 0.3
        axe_int_height = 0.05
        axe_int = pylab.axes([
            axe_int_x_offset, axe_int_y_offset, axe_int_width, axe_int_height
        ])

        axe_meri_x_offset = 0.4
        axe_meri_y_offset = 0.1
        axe_meri_width = axe_int_width
        axe_meri_height = axe_int_height
        axe_meri = pylab.axes([
            axe_meri_x_offset, axe_meri_y_offset, axe_meri_width,
            axe_meri_height
        ])

        axe_flower = pylab.axes(
            [axe_meri_x_offset, 0.01, axe_meri_width, 0.05])
        axe_flower.text(0.5, 0.5, 'Flowering', \
              horizontalalignment ='center', verticalalignment='center', size=8)
        #iteration over pathway_id2axe will do. omit this section.
        axe_flower.set_xticks([])
        axe_flower.set_yticks([])

        axe_score_legend = pylab.axes(
            [axe_int_x_offset + axe_int_width + 0.1, 0.01, 0.1, 0.3],
            frameon=False)  #axes for the legend of LD
        axe_score_legend.set_xticks([])
        axe_score_legend.set_yticks([])

        axe_cover = pylab.axes([0.03, 0.01, 0.95, 0.95], frameon=False)
        axe_cover.set_xticks([])
        axe_cover.set_yticks([])

        pathway_id2axe = {1:axe_flc_act, 2: axe_repr, 3:axe_light, 4:axe_gibbe, 5:axe_vern, 6:axe_int, 7: axe_meri, \
            8:axe_flc, 9:axe_co, 10:axe_auto}
        #draw edges first to avoid the edges overwriting the pathway labels
        pathway_edges = Stock_250kDB.FTPathwayRelationship.query.all()

        for pathway_edge in pathway_edges:

            axe_start = pathway_id2axe[pathway_edge.pathway1_id]
            axe_stop = pathway_id2axe[pathway_edge.pathway2_id]
            axe_cover.set_xlim([0, 1])
            axe_cover.set_ylim([0, 1])
            self.draw_edge_between_pathways(axe_cover, axe_start, axe_stop, edge_type=pathway_edge.relationship_type_id, \
             arrow_start_point_loc =pathway_edge.arrow_start_point_loc,\
             arrow_end_point_loc=pathway_edge.arrow_end_point_loc)
            axe_cover.set_xlim([0, 1])
            axe_cover.set_ylim([0, 1])

        for pathway_id, axe in pathway_id2axe.iteritems():
            axe.set_xticks([])
            axe.set_yticks([])
            pathway = Stock_250kDB.FTPathway.get(pathway_id)
            axe.title.set_text(pathway.short_name)
            gene_id_ls = pathway_id2gene_id_ls.get(pathway_id)
            if gene_id_ls:
                self.draw_pathway_box(gene_id_ls, axe, gene_id2gene_symbol, gene_id2score, value2color_func, x_span=1., y_span=1., \
                  gene_label_size=8)

        #draw the final edge from axe_meri to axe_flower
        self.draw_edge_between_pathways(axe_cover, axe_meri, axe_flower, edge_type=0, arrow_start_point_loc='bottom middle',\
           arrow_end_point_loc='top middle')

        phenotype_method = Stock_250kDB.PhenotypeMethod.get(
            self.phenotype_method_id)
        analysis_method = Stock_250kDB.AnalysisMethod.get(
            self.analysis_method_id)
        axe_cover.title.set_text(
            '%s by %s' %
            (phenotype_method.short_name, analysis_method.short_name))
        axe_cover.set_xlim([0, 1])
        axe_cover.set_ylim([0, 1])

        self.drawLegend(axe_score_legend,
                        value2color_func,
                        min_score,
                        max_score,
                        no_of_bands=20,
                        score_is_rank=self.score_is_rank)

        png_output_fname = '%s.png' % self.output_fname_prefix
        pylab.savefig(png_output_fname, dpi=600)
        pylab.savefig('%s.svg' % self.output_fname_prefix)
    def run(self):
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup()
        session = db.session

        if self.test_result_type == 1:
            test_result_class_table = CandidateGeneRankSumTestResult.table.name
            test_result_class_table = 'candidate_gene_rank_sum_test_result_2008_09_15'
        elif self.test_result_type == 2:
            test_result_class_table = CandidateGeneTopSNPTest.table.name
        elif self.test_result_type == 3:
            test_result_class_table = Stock_250kDB.CandidateGeneRankSumTestResultMethod.table.name
        else:
            sys.stderr.write(" test_result_type %s not supported.\n" %
                             (self.test_result_type))
            sys.exit(2)

        #the condition for min_MAF is tricky because of the floating precision.
        if self.test_result_type == 1:
            where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"         \
             %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
        elif self.test_result_type == 2:
            where_condition = "%s r, %s rg, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null and r.id=rg.results_method_id \
				and c.results_id=rg.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"         \
             %(ResultsMethod.table.name, ResultsByGene.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
        elif self.test_result_type == 3:
            where_condition = "%s r, %s c, %s g where g.id=c.list_type_id and r.analysis_method_id is not null \
				and c.results_id=r.id and c.get_closest=%s and c.min_distance=%s and abs(c.min_MAF-%s)<0.00001"         \
             %(ResultsMethod.table.name, test_result_class_table, GeneListType.table.name, self.get_closest, self.min_distance, self.min_MAF)
        if self.call_method_id_ls:
            where_condition += " and r.call_method_id in (%s)" % self.call_method_id_ls

        if self.analysis_method_id_ls:
            where_condition += " and r.analysis_method_id in (%s)" % self.analysis_method_id_ls
        if self.super_type_id:
            where_condition += " and g.super_type_id=%s" % self.super_type_id

        if self.test_type:
            where_condition += " and c.test_type=%s" % self.test_type

        if self.test_result_type == 1:
            pass
            where_condition += " and c.max_pvalue_per_gene=%s" % (
                self.max_pvalue_per_gene)
        elif self.test_result_type == 2:
            where_condition += " and c.no_of_top_snps=%s" % (
                self.no_of_top_snps)

        list_type_id_ls = self.getListTypeInfo(db, where_condition)
        analysis_method_id_ls = self.getAnalysisMethodInfo(db, where_condition)
        list_type_analysis_method_info = self.orderListTypeAnalysisMethodID(
            list_type_id_ls, analysis_method_id_ls)
        phenotype_info = self.getPhenotypeInfo(db, where_condition)
        rdata = self.get_data_matrix(db, phenotype_info,
                                     list_type_analysis_method_info,
                                     where_condition)

        rdata.data_matrix = self.markDataMatrixBoundary(
            rdata.data_matrix, phenotype_info, list_type_analysis_method_info)

        header = ['list_type_analysis_method', ''
                  ] + phenotype_info.phenotype_method_label_ls
        strain_acc_list = list_type_analysis_method_info.list_type_analysis_method_label_ls
        category_list = list_type_analysis_method_info.list_type_id_analysis_method_id_ls
        if SNPData.isDataMatrixEmpty(rdata.data_matrix):
            sys.stderr.write("Nothing fetched from database.\n")
            sys.exit(3)
        if self.output_fname:
            write_data_matrix(rdata.data_matrix, self.output_fname, header,
                              strain_acc_list, category_list)

        if self.fig_fname:
            font = get_font(self.font_path,
                            font_size=self.font_size)  #2008-08-01
            value2color_func = lambda x: Value2Color.value2HSLcolor(
                x, rdata.min_value, rdata.max_value)
            im_legend = drawContinousLegend(rdata.min_value, rdata.max_value,
                                            self.no_of_ticks, value2color_func,
                                            font)
            #im.save('%s_legend.png'%self.fig_fname_prefix)
            im = drawMatrix(rdata.data_matrix, value2color_func, list_type_analysis_method_info.list_type_analysis_method_label_ls,\
               phenotype_info.phenotype_method_label_ls, with_grid=1, font=font)
            im = combineTwoImages(im, im_legend, font=font)
            im.save(self.fig_fname)