Exemplo n.º 1
0
	def test_getEcotypeInfo(self):
		from common import getEcotypeInfo
		import StockDB, Stock_250kDB	#StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo()
		hostname='papaya.usc.edu'
		dbname='stock_250k'
		db_user='******'
		db_passwd = ''
		drivername='mysql'
		schema = None
		db = Stock_250kDB.Stock_250kDB(drivername=drivername, username=db_user,
						password=db_passwd, hostname=hostname, database=dbname, schema=schema)
		#doesn't matter which database to connect as far as StockDB is imported
		#db = StockDB.StockDB(drivername=drivername, username=db_user,
		#				password=db_passwd, hostname=hostname, database=dbname, schema=schema)
		db.setup(create_tables=False)
		import pdb
		pdb.set_trace()
		getEcotypeInfo(db)
Exemplo n.º 2
0
 def test_getEcotypeInfo(self):
     from common import getEcotypeInfo
     import StockDB, Stock_250kDB  #StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo()
     hostname = 'papaya.usc.edu'
     dbname = 'stock_250k'
     db_user = '******'
     db_passwd = ''
     drivername = 'mysql'
     schema = None
     db = Stock_250kDB.Stock_250kDB(drivername=drivername,
                                    username=db_user,
                                    password=db_passwd,
                                    hostname=hostname,
                                    database=dbname,
                                    schema=schema)
     #doesn't matter which database to connect as far as StockDB is imported
     #db = StockDB.StockDB(drivername=drivername, username=db_user,
     #				password=db_passwd, hostname=hostname, database=dbname, schema=schema)
     db.setup(create_tables=False)
     import pdb
     pdb.set_trace()
     getEcotypeInfo(db)
Exemplo n.º 3
0
	def run(self):
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		
		snpData = SNPData(input_fname=self.input_fname, turn_into_integer=1, turn_into_array=1, ignore_2nd_column=1)
		
		
		if self.eigen_vector_fname and self.eigen_value_fname:
			eigen_value_ls = self.getEigenValueFromFile(self.eigen_value_fname)
			eigen_value_ls = numpy.array(eigen_value_ls)
			explained_var = eigen_value_ls/numpy.sum(eigen_value_ls)
			PC_data = self.getPCFromFile(self.eigen_vector_fname)
			PC_matrix = PC_data.PC_matrix
		else:
			max_no_of_snps = 10000
			if len(snpData.col_id_ls)>max_no_of_snps:	#2008-12-01 randomly pick max_no_of_snps SNPs
				picked_col_index_ls = random.sample(range(len(snpData.col_id_ls)), max_no_of_snps)
				new_col_id_ls = [snpData.col_id_ls[i] for i in picked_col_index_ls]
				newSnpData = SNPData(row_id_ls=snpData.row_id_ls, col_id_ls=new_col_id_ls, strain_acc_list=snpData.strain_acc_list,\
								category_list=snpData.category_list)
				newSnpData.data_matrix = snpData.data_matrix[:, picked_col_index_ls]
				snpData = newSnpData
		
			snpData, allele_index2allele_ls = snpData.convertSNPAllele2Index()
			explained_var = None
			PC_matrix = None
		
		header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(self.phenotype_fname, turn_into_integer=0)
		phenData = SNPData(header=header_phen, strain_acc_list=snpData.strain_acc_list, data_matrix=data_matrix_phen)	#row label is that of the SNP matrix, because the phenotype matrix is gonna be re-ordered in that way
		phenData.data_matrix = Kruskal_Wallis.get_phenotype_matrix_in_data_matrix_order(snpData.row_id_ls, strain_acc_list_phen, phenData.data_matrix)	#tricky, using strain_acc_list_phen
		
		phenotype_col_index = self.findOutWhichPhenotypeColumn(phenData, Set([self.phenotype_method_id]))[0]
		
		
		ecotype_info = getEcotypeInfo(db, self.country_order_type)
		
		#the offset below decides where the label of strains/snps should start in axe_snp_matrix
		#2008-11-14 only for PlotGroupOfSNPs.py. you can set it to 1 cuz we dont' draw axe_snp_matrix here.
		snp_id_label_y_offset = 0.95
		StrainID2PCAPosInfo = self.getStrainID2PCAPosInfo(snpData, pca_range=[0,1], snp_id_label_y_offset=snp_id_label_y_offset, explained_var=explained_var, T=PC_matrix)
		
		axe_y_offset1 = 0.03
		axe_height1 = 0.45	#height of axe_chromosome, twice height of axe_map_phenotype_legend
		axe_y_offset2 = axe_y_offset1+axe_height1
		axe_height2 = 0.5	#height of axe_strain_pca, axe_snp_matrix, axe_map
		axe_y_offset3 = axe_y_offset2+axe_height2
		
		axe_x_offset1 = 0.05
		axe_width1 = 0.8	#width of axe_strain_pca
		axe_x_offset2 = axe_x_offset1 + 0.02 + axe_width1
		axe_width2 = 0.05	#width of axe_chromosome, axe_snp_matrix, axe_snp_pca
		axe_x_offset3 = axe_x_offset2 + axe_width2
		axe_width3 = 0.02	#width of axe_phenotype
		
		phenotype_method = Stock_250kDB.PhenotypeMethod.get(self.phenotype_method_id)
		
		phenotype_cmap = mpl.cm.jet
		max_phenotype = numpy.nanmax(phenData.data_matrix[:,phenotype_col_index])	#nanmax ignores the nan elements
		min_phenotype = numpy.nanmin(phenData.data_matrix[:,phenotype_col_index])	#nanmin ignores the nan elements
		phenotype_gap = max_phenotype - min_phenotype
		phenotype_jitter = phenotype_gap/10.
		phenotype_norm = mpl.colors.Normalize(vmin=min_phenotype-phenotype_jitter, vmax=max_phenotype+phenotype_jitter)
		axe_map_phenotype_legend = pylab.axes([axe_x_offset2, axe_y_offset1, axe_width2, 0.3], frameon=False)
		cb = mpl.colorbar.ColorbarBase(axe_map_phenotype_legend, cmap=phenotype_cmap,
									norm=phenotype_norm,
									orientation='vertical')
		cb.set_label('Legend Of Phenotype %s %s'%(phenotype_method.id, phenotype_method.short_name))
		
		axe_strain_map = pylab.axes([axe_x_offset1, axe_y_offset2, axe_width1, axe_height2], frameon=False)
		axe_strain_pca = pylab.axes([axe_x_offset1, axe_y_offset1, axe_width1, axe_height1], frameon=False)
		axe_strain_map_pca_cover = pylab.axes([axe_x_offset1, axe_y_offset1, axe_width1, axe_height1+axe_height2], frameon=False, \
											sharex=axe_strain_pca)	#cover both axe_strain_map and axe_strain_pca
		axe_strain_map_pca_cover.set_yticks([])
		axe_strain_pca_xlim = [-0.05,1.05]
		axe_strain_pca_ylim = [0, 1.05]
		axe_strain_pca.set_xlim(axe_strain_pca_xlim)
		axe_strain_pca.set_ylim(axe_strain_pca_ylim)
		axe_strain_map_pca_cover_ylim = [0, (axe_height1+axe_height2)/axe_height1]	#set it accordingly
		axe_strain_map_pca_cover.set_ylim(axe_strain_map_pca_cover_ylim)
				
		axe_strain_pca.grid(True, alpha=0.3)
		axe_strain_pca.set_xticks([])
		axe_strain_pca.set_yticks([])
		axe_strain_pca_legend = None	#no pca legend
		self.drawStrainPCA(axe_strain_pca, axe_strain_map, axe_strain_map_pca_cover, axe_strain_pca_legend, StrainID2PCAPosInfo, \
						ecotype_info, phenData, \
					phenotype_col_index, phenotype_cmap, phenotype_norm, rightmost_x_value=axe_strain_pca_xlim[1],\
					strain_color_type=2, pca2map_line_color=None, ecotype_width_on_map=10,\
					draw_lines_to_axe_snp_matrix = False, strain_size_on_axe_strain_pca=14, pic_area=self.pic_area,\
					map_pca_line_alpha=0.2, map_pca_linewidth=0.2)	#customize a couple of things
		
		axe_strain_pca.set_xlim(axe_strain_pca_xlim)
		axe_strain_pca.set_ylim(axe_strain_pca_ylim)
		axe_strain_map_pca_cover.set_ylim(axe_strain_map_pca_cover_ylim)
		
		png_output_fname = '%s.png'%self.output_fname_prefix
		pylab.savefig(png_output_fname, dpi=400)
		pylab.savefig('%s.svg'%self.output_fname_prefix)
		
		self.plotLatLonPhenVsPC(ecotype_info, StrainID2PCAPosInfo, phenData, phenotype_col_index, phenotype_cmap, phenotype_norm, 
						self.output_fname_prefix, commit=self.commit)
Exemplo n.º 4
0
    def test_drawMap(self):
        sys.stderr.write("Drawing graph on a map ...\n")
        #import pdb
        #pdb.set_trace()
        import StockDB, Stock_250kDB  #StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo()
        hostname = 'papaya.usc.edu'
        dbname = 'stock_250k'
        db_user = '******'
        db_passwd = ''
        drivername = 'mysql'
        schema = None
        db = Stock_250kDB.Stock_250kDB(drivername=drivername,
                                       username=db_user,
                                       password=db_passwd,
                                       hostname=hostname,
                                       database=dbname,
                                       schema=schema)
        #doesn't matter which database to connect as far as StockDB is imported
        #db = StockDB.StockDB(drivername=drivername, username=db_user,
        #				password=db_passwd, hostname=hostname, database=dbname, schema=schema)
        db.setup(create_tables=False)
        from common import getEcotypeInfo
        ecotype_info = getEcotypeInfo(db)

        from matplotlib.toolkits.basemap import Basemap
        #from mpl_toolkits.basemap import Basemap
        import pylab
        from matplotlib import rcParams
        rcParams['font.size'] = 6
        rcParams['legend.fontsize'] = 6
        #rcParams['text.fontsize'] = 6	#deprecated. use font.size instead
        rcParams['axes.labelsize'] = 4
        rcParams['axes.titlesize'] = 8
        rcParams['xtick.labelsize'] = 4
        rcParams['ytick.labelsize'] = 4

        pylab.clf()

        #fig = pylab.figure()
        #fig.add_axes([0.05,0.05,0.9,0.9])	#[left, bottom, width, height]
        axe_map = pylab.axes([0.5, 0.02, 0.4, 0.8], frameon=False)
        axe_map.set_title("Global Arabidopsis Ecotype Distribution")
        axe_map.set_xlabel('ecotype is drawn as circles.')
        axe_map.set_ylabel('strains')
        pic_area = [-140, -40, 140, 70]
        m = Basemap(llcrnrlon=pic_area[0],llcrnrlat=pic_area[1],urcrnrlon=pic_area[2],urcrnrlat=pic_area[3],\
        resolution='l',projection='mill', ax=axe_map)
        """
		llcrnrx = -self.rmajor
		llcrnry = -self.rmajor
		urcrnrx = -llcrnrx
		urcrnry = -llcrnry
		"""
        #m.drawcoastlines()
        #m.bluemarble()
        m.drawparallels(pylab.arange(-90, 90, 30),
                        labels=[1, 1, 0, 1],
                        size=4,
                        linewidth=0.1)
        m.drawmeridians(pylab.arange(-180, 180, 30),
                        labels=[1, 1, 0, 1],
                        size=4,
                        linewidth=0.1)
        m.fillcontinents()
        m.drawcountries(linewidth=0.1)
        #m.drawstates()
        #m.drawlsmask((0,255,0,255), (0,0,255,255), lakes=True)
        #m.drawlsmask('coral','aqua',lakes=True)

        print "xlim:", axe_map.get_xlim()
        print "ylim:", axe_map.get_ylim()

        xlim = axe_map.get_xlim()
        ylim = axe_map.get_ylim()
        """
		for strain_id in StrainID2PCAPosInfo.strain_id_ls:
			img_y_pos = StrainID2PCAPosInfo.strain_id2img_y_pos[strain_id]
			phenotype_row_index = phenData.row_id2row_index[strain_id]
			phenotype = phenData.data_matrix[phenotype_row_index][phenotype_col_index]
			ecotype_id = int(strain_id)
			ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id)
			if ecotype_obj:
				lat, lon = ecotype_obj.latitude, ecotype_obj.longitude
			else:
				sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id)
				continue
			if lat and lon:
				x, y = m(lon, lat)
				color = cmap(norm(phenotype))
				ax.plot([0, x], [img_y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2)
				ax.scatter([x],[y], s=10, linewidth=0, facecolor=color)	#, zorder=10)
		"""

        #pylab.title("Global Arabidopsis Ecotype Distribution")
        output_fname_prefix = '/tmp/map'

        print "ylim:", axe_map.get_ylim()
        axe_map.set_xlim(xlim)
        axe_map.set_ylim(ylim)

        axe_chromosome = pylab.axes([0.05, 0.02, 0.8, 0.8], frameon=False)
        axe_chromosome.set_title("chromosome")

        #fix the two transformations before doing cross-axe drawings
        axe_map.transData.freeze()  # eval the lazy objects
        axe_map.transAxes.freeze()
        axe_chromosome.transData.freeze()  # eval the lazy objects
        axe_chromosome.transAxes.freeze()
        no_of_ecotypes = 200
        ecotype_id_ls = ecotype_info.ecotype_id2ecotype_obj.keys(
        )[:no_of_ecotypes]
        no_of_ecotypes_drawn = 0
        for i in range(no_of_ecotypes):
            ecotype_id = ecotype_id_ls[i]
            y_pos = i / float(no_of_ecotypes)
            #y_pos = i/float(no_of_ecotypes)*ylim[1]
            ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id)
            if ecotype_obj:
                lat, lon = ecotype_obj.latitude, ecotype_obj.longitude
            else:
                sys.stderr.write(
                    "Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"
                    % ecotype_id)
                continue
            if lat and lon:
                x, y = m(lon, lat)
                #axe_map.plot([0, x], [y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2)
                axe_map.set_xlim(xlim)
                axe_map.set_ylim(ylim)
                axe_map.scatter([x], [y],
                                s=5,
                                linewidth=0,
                                facecolor='r',
                                alpha=0.2,
                                zorder=10)
                canvas_x, canvas_y = axe_map.transData.xy_tup((x, y))
                axe_chromosome_xy = axe_chromosome.transData.inverse_xy_tup(
                    (canvas_x, canvas_y))
                axe_chromosome.plot([0, axe_chromosome_xy[0]],
                                    [y_pos, axe_chromosome_xy[1]],
                                    linestyle='--',
                                    alpha=0.2,
                                    linewidth=0.2)
                no_of_ecotypes_drawn += 1
        #release two transformations
        axe_map.transData.thaw()  # eval the lazy objects
        axe_map.transAxes.thaw()
        axe_chromosome.transData.thaw()  # eval the lazy objects
        axe_chromosome.transAxes.thaw()
        #set to the same x/y_lim before cross-axe drawing
        axe_map.set_xlim(xlim)
        axe_map.set_ylim(ylim)
        axe_chromosome.set_xlim([0, 1])
        axe_chromosome.set_ylim([0, 1])
        if output_fname_prefix:
            pylab.savefig('%s.png' % output_fname_prefix, dpi=600)
            pylab.savefig('%s.svg' % output_fname_prefix)
        sys.stderr.write("%s ecotypes drawn. Done.\n" % (no_of_ecotypes_drawn))
Exemplo n.º 5
0
	def test_drawMap(self):
		sys.stderr.write("Drawing graph on a map ...\n")
		#import pdb
		#pdb.set_trace()
		import StockDB, Stock_250kDB	#StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo()
		hostname='papaya.usc.edu'
		dbname='stock_250k'
		db_user='******'
		db_passwd = ''
		drivername='mysql'
		schema = None
		db = Stock_250kDB.Stock_250kDB(drivername=drivername, username=db_user,
						password=db_passwd, hostname=hostname, database=dbname, schema=schema)
		#doesn't matter which database to connect as far as StockDB is imported
		#db = StockDB.StockDB(drivername=drivername, username=db_user,
		#				password=db_passwd, hostname=hostname, database=dbname, schema=schema)
		db.setup(create_tables=False)
		from common import getEcotypeInfo
		ecotype_info = getEcotypeInfo(db)
			
		from matplotlib.toolkits.basemap import Basemap
		#from mpl_toolkits.basemap import Basemap
		import pylab
		from matplotlib import rcParams
		rcParams['font.size'] = 6
		rcParams['legend.fontsize'] = 6
		#rcParams['text.fontsize'] = 6	#deprecated. use font.size instead
		rcParams['axes.labelsize'] = 4
		rcParams['axes.titlesize'] = 8
		rcParams['xtick.labelsize'] = 4
		rcParams['ytick.labelsize'] = 4
		
		pylab.clf()
		
		#fig = pylab.figure()
		#fig.add_axes([0.05,0.05,0.9,0.9])	#[left, bottom, width, height]
		axe_map = pylab.axes([0.5, 0.02, 0.4, 0.8], frameon=False)
		axe_map.set_title("Global Arabidopsis Ecotype Distribution")
		axe_map.set_xlabel('ecotype is drawn as circles.')
		axe_map.set_ylabel('strains')
		pic_area=[-140,-40,140,70]
		m = Basemap(llcrnrlon=pic_area[0],llcrnrlat=pic_area[1],urcrnrlon=pic_area[2],urcrnrlat=pic_area[3],\
		resolution='l',projection='mill', ax=axe_map)
		"""
		llcrnrx = -self.rmajor
		llcrnry = -self.rmajor
		urcrnrx = -llcrnrx
		urcrnry = -llcrnry
		"""
		#m.drawcoastlines()
		#m.bluemarble()
		m.drawparallels(pylab.arange(-90,90,30), labels=[1,1,0,1], size=4, linewidth=0.1)
		m.drawmeridians(pylab.arange(-180,180,30), labels=[1,1,0,1], size=4, linewidth=0.1)
		m.fillcontinents()
		m.drawcountries(linewidth=0.1)
		#m.drawstates()
		#m.drawlsmask((0,255,0,255), (0,0,255,255), lakes=True)
		#m.drawlsmask('coral','aqua',lakes=True)

		print "xlim:", axe_map.get_xlim()
		print "ylim:", axe_map.get_ylim()
		
		xlim = axe_map.get_xlim()
		ylim = axe_map.get_ylim()
		
		"""
		for strain_id in StrainID2PCAPosInfo.strain_id_ls:
			img_y_pos = StrainID2PCAPosInfo.strain_id2img_y_pos[strain_id]
			phenotype_row_index = phenData.row_id2row_index[strain_id]
			phenotype = phenData.data_matrix[phenotype_row_index][phenotype_col_index]
			ecotype_id = int(strain_id)
			ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id)
			if ecotype_obj:
				lat, lon = ecotype_obj.latitude, ecotype_obj.longitude
			else:
				sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id)
				continue
			if lat and lon:
				x, y = m(lon, lat)
				color = cmap(norm(phenotype))
				ax.plot([0, x], [img_y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2)
				ax.scatter([x],[y], s=10, linewidth=0, facecolor=color)	#, zorder=10)
		"""
		
		#pylab.title("Global Arabidopsis Ecotype Distribution")
		output_fname_prefix = '/tmp/map'
		
		print "ylim:", axe_map.get_ylim()
		axe_map.set_xlim(xlim)
		axe_map.set_ylim(ylim)
		
		
		axe_chromosome = pylab.axes([0.05, 0.02, 0.8, 0.8], frameon=False)
		axe_chromosome.set_title("chromosome")
		
		#fix the two transformations before doing cross-axe drawings
		axe_map.transData.freeze()  # eval the lazy objects
		axe_map.transAxes.freeze()
		axe_chromosome.transData.freeze()  # eval the lazy objects
		axe_chromosome.transAxes.freeze()
		no_of_ecotypes = 200
		ecotype_id_ls = ecotype_info.ecotype_id2ecotype_obj.keys()[:no_of_ecotypes]
		no_of_ecotypes_drawn = 0
		for i in range(no_of_ecotypes):
			ecotype_id = ecotype_id_ls[i]
			y_pos = i/float(no_of_ecotypes)
			#y_pos = i/float(no_of_ecotypes)*ylim[1]
			ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id)
			if ecotype_obj:
				lat, lon = ecotype_obj.latitude, ecotype_obj.longitude
			else:
				sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id)
				continue
			if lat and lon:
				x, y = m(lon, lat)
				#axe_map.plot([0, x], [y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2)
				axe_map.set_xlim(xlim)
				axe_map.set_ylim(ylim)
				axe_map.scatter([x],[y], s=5, linewidth=0, facecolor='r', alpha=0.2, zorder=10)
				canvas_x, canvas_y = axe_map.transData.xy_tup((x,y))
				axe_chromosome_xy = axe_chromosome.transData.inverse_xy_tup((canvas_x,canvas_y))
				axe_chromosome.plot([0,axe_chromosome_xy[0]], [y_pos, axe_chromosome_xy[1]], linestyle='--', alpha=0.2, linewidth=0.2)
				no_of_ecotypes_drawn += 1
		#release two transformations
		axe_map.transData.thaw()  # eval the lazy objects
		axe_map.transAxes.thaw()
		axe_chromosome.transData.thaw()  # eval the lazy objects
		axe_chromosome.transAxes.thaw()
		#set to the same x/y_lim before cross-axe drawing
		axe_map.set_xlim(xlim)
		axe_map.set_ylim(ylim)
		axe_chromosome.set_xlim([0,1])
		axe_chromosome.set_ylim([0,1])
		if output_fname_prefix:
			pylab.savefig('%s.png'%output_fname_prefix, dpi=600)
			pylab.savefig('%s.svg'%output_fname_prefix)
		sys.stderr.write("%s ecotypes drawn. Done.\n"%(no_of_ecotypes_drawn))