def funcion(dato,opciones): from rpy import r diccionario={} if opciones.has_key("Cuantiles"): if opciones["Cuantiles"][u"DirecciónCola"]=='izquierda': sentido=True else: sentido=False diccionario["cuantiles"]=r.qnorm([float(opciones["Cuantiles"]["Probabilidad"])],mean=float(opciones["Cuantiles"]["Media"]),sd=float(opciones["Cuantiles"][u"Desviación"]),lower_tail=sentido) if opciones.has_key("Probabilidades"): if opciones["Probabilidades"][u"DirecciónCola"]=='izquierda': sentido=True else: sentido=False diccionario["probabilidades"]=r.pnorm([float(opciones["Probabilidades"]["Valores"])],mean=float(opciones["Probabilidades"]["Media"]),sd=float(opciones["Probabilidades"][u"Desviación"]),lower_tail=sentido) if opciones.has_key(u"Gráfica"): import random nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png" diccionario["ruta"]=nombrefichero r.png(nombrefichero) #Directorio temporal de la config lista=r.seq(-3.291, 3.291, length=100) if opciones[u"Gráfica"]["Tipografica"]=="Densidad": etiquetay="Densidad" mifuncion=r.dnorm else: etiquetay="Probabilidad acumulada" mifuncion=r.pnorm r.plot(lista, mifuncion(lista, mean=float(opciones[u"Gráfica"]["Media"]), sd=float(opciones[u"Gráfica"][u"Desviación"])), xlab="x", ylab=etiquetay, main=r.expression(r.paste("Normal Distribution: ", "mu", " = 0, ", "sigma", " = 1")), type="l") r.abline(h=0, col="gray") r.dev_off() return diccionario
def scatter_plot(self, gene_id_list, output_fname='/tmp/scatter_plot.ps'): """ 02-15-06 1st gene is regarded as X, all others genes are treated as Y """ vector_list = [] #gene_id_list may contain some inexistent genes real_gene_id_list = [] for gene_id in gene_id_list: if gene_id in self.gene_id2expr_array: real_gene_id_list.append(gene_id) vector_list.append(self.gene_id2expr_array[gene_id]) else: sys.stderr.write("%s doesn't appear in the dataset\n"%(gene_id)) if len(real_gene_id_list)>0: r.postscript("%s"%output_fname) axis_range = self.get_min_max(vector_list) no_of_curves = 1 #counting starts from 1st gene itself. no_of_curves += 1 r.plot(vector_list[0], vector_list[1], xlab='value of %s'%real_gene_id_list[0], xlim=axis_range, ylim=axis_range, \ ylab='other genes values', col=no_of_curves) for i in range(2, len(vector_list)): no_of_curves += 1 r.points(vector_list[0], vector_list[i], col=no_of_curves) r.legend(axis_range[1], axis_range[1], gene_id_list, col=range(1, no_of_curves+1), lty=1, xjust=1) r.dev_off() return output_fname else: return None
def plot_boxplot_r(self, filename=None, n_highest_weights=1, n_highest_weights_for_quantity=1, weight_threshold=None, logy=False): from rpy import r logstring = '' if logy: logstring='y' if filename is not None: r.pdf(file=filename) for var, values in self.values_from_mr.iteritems(): plot_one_boxplot_r(values, var, logstring) if values.ndim == 1: v = resize(values, (1, values.size)) else: v = values ivar = self.get_index_for_quantity(var) if weight_threshold is not None: for i in range(0, v.shape[0]): iw = self.get_index_of_component_weights_over_threshold(ivar, weight_threshold) if iw.size > 0: r.points(i+1, v[i:(i+iw.size),iw], col='yellow', cex=0.5) iw = self.get_index_of_weights_over_threshold(weight_threshold) if iw.size > 0: r.points(i+1, v[i:(i+iw.size),iw], col='blue', cex=0.5) if n_highest_weights_for_quantity > 0: for i in range(0, v.shape[0]): r.points(i+1, v[i,self.get_index_of_n_highest_component_weights(ivar, n_highest_weights_for_quantity)], col='green', cex=0.5) if n_highest_weights > 0: for i in range(0, v.shape[0]): r.points(i+1, v[i,self.get_index_of_n_highest_weights(n_highest_weights)], col='red', cex=0.5) if filename is not None: r.dev_off()
def main(): in_fname = sys.argv[1] out_fname = sys.argv[2] try: columns = int( sys.argv[3] ) - 1, int( sys.argv[4] ) - 1 except: stop_err( "Columns not specified, your query does not contain a column of numerical data." ) title = sys.argv[5] xlab = sys.argv[6] ylab = sys.argv[7] matrix = [] skipped_lines = 0 first_invalid_line = 0 invalid_value = '' invalid_column = 0 i = 0 for i, line in enumerate( file( in_fname ) ): valid = True line = line.rstrip( '\r\n' ) if line and not line.startswith( '#' ): row = [] fields = line.split( "\t" ) for column in columns: try: val = fields[column] if val.lower() == "na": row.append( float( "nan" ) ) else: row.append( float( fields[column] ) ) except: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 try: invalid_value = fields[column] except: invalid_value = '' invalid_column = column + 1 break else: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 if valid: matrix.append( row ) if skipped_lines < i: try: r.pdf( out_fname, 8, 8 ) r.plot( array( matrix ), type="p", main=title, xlab=xlab, ylab=ylab, col="blue", pch=19 ) r.dev_off() except Exception, exc: stop_err( "%s" % str( exc ) )
def drawCpuUsage(d,xat,xlbs,pic): data=r.c(d) pch = 22 # point like like a square lty = 1 # line style solid line col = "blue" # line color ltype = "o" # line only http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/plot.html ylim = (0,100) # y domain xaxis=1 yaxis=2 vertical_text=2 horizontal_text=1 text_size=0.8 yat=[x for x in range(0,110,10)] ylbs=["%d%%"%x for x in yat] legend_x=1 legend_y=100 # for r.text method warn_val=40 # val over this value will display a text on point xtext=[idx for idx,val in enumerate(d) if val > warn_val] # text x pos ytext=[val for idx,val in enumerate(d) if val > warn_val] # text y pos labtext=["%d%%"%val for idx,val in enumerate(d) if val > warn_val] # label for text # summary dmax=max(d) sumry="max %.2f%%" % dmax sumrycol="red" if dmax>50 else "green" # plot r.png(pic, width=900,height=450*0.6) r.plot(data, type=ltype, col=col, ylim=ylim, pch=pch, lty=lty, axes=False, ann=False) # draw text that over 40 # http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/text.html r.text(xtext, ytext, labels=labtext, pos=3, cex=0.8, col="red") # summary text r.mtext(sumry, side=3, cex=1, col=sumrycol) # axis r.axis(xaxis, las=vertical_text, at=xat, lab=r.c(xlbs)) r.axis(yaxis, las=horizontal_text, at=yat, lab=r.c(ylbs)) r.box() # titles r.title(main="CPU Sampling") r.title(xlab="Time") r.title(ylab="CPU Usage") # reference line # r.abline(h=50, col="gray") # at 50% # legend r.legend(legend_x, legend_y, r.c(("trunk")), col=col, cex=text_size, pch=pch, lty=lty) r.dev_off()
def plot(self, outputfname, fix_index_ls, parameter_list, var_index, variant_ls, parameter_index2label, y_axis_ls, y_label): outputfname = '%s_%s2%s.png'%(outputfname, y_label, parameter_index2label[var_index]) sys.stderr.write('Plotting %s'%outputfname) r.png(outputfname) r.plot(variant_ls, y_axis_ls, main='%s vs %s (%s=%s, %s=%s)'%(y_label, parameter_index2label[var_index],\ parameter_index2label[fix_index_ls[0]], parameter_list[fix_index_ls[0]], parameter_index2label[fix_index_ls[1]],\ parameter_list[fix_index_ls[1]]), xlab=parameter_index2label[var_index], ylab=y_label) r.dev_off() sys.stderr.write('Done.\n')
def draw_tfbs_similarity_ls_histogram(self, tfbs_similarity_ls, output_fname): sys.stderr.write("Drawing histogram for tfbs_similarity_ls...") if len(tfbs_similarity_ls)>10: r.png('%s'%output_fname) r.hist(tfbs_similarity_ls, main='histogram',xlab='tfbs_similarity',ylab='freq') r.dev_off() sys.stderr.write("Done.\n") else: sys.stderr.write("too short: %s, aborted\n"%tfbs_similarity_ls)
def hist_plot(self, dict, filename, xlabel, ylabel): #convert self.go_no2cluster and self.go_no2gene into histograms r.png('%s'%filename) x_list = [] y_list = [] for (key, value) in dict.iteritems(): x_list.append(key) y_list.append(len(value)) r.plot(x_list, y_list, type='h', xlab=xlabel, ylab=ylabel, main='%s v.s. %s'%(ylabel, xlabel)) r.dev_off()
def plot(self, vector_list, gene_id_list): self.no_of_curves = 0 x_range = (1, len(vector_list[0])) y_range = self.get_min_max(vector_list) r.postscript("%s"%self.plot_file) for vector in vector_list: (x_list, y_list) = self.xy_list_return(vector) self._plot(x_list, y_list, x_range, y_range) r.legend(x_range[1], y_range[1], gene_id_list, col=range(1, self.no_of_curves+1), lty=1, pch='*', xjust=1) r.dev_off()
def funcionprincipal(dato, variable, opciones): from rpy import r #pylint: disable=import-error diccionario = {"Diagrama de Barras":{}} lista = dato.query(variable) import random nombrefichero = "/tmp/driza" + str(random.randint(1, 99999)) + ".png" diccionario["Diagrama de Barras"]["ruta"] = nombrefichero r.png(nombrefichero) #Directorio temporal de la config r.barplot(lista, main = variable, axisnames = True, axes = True) r.dev_off() return diccionario
def create_p_value_boxplot_eps(best_p_values, filename): from rpy import r r.postscript(filename, horizontal=False, height=4.5, width=6, pointsize=10) try: keys = best_p_values.keys() keys.sort() r.boxplot(map(best_p_values.get, keys), names=map(str, keys), xlab="sample size", ylab="p-score") finally: r.dev_off()
def funcionprincipal(dato,variable,opciones): from rpy import r diccionario={} diccionario["Histograma"]={} lista = dato.query(variable) import random nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png" diccionario["Histograma"]["ruta"]=nombrefichero r.png(nombrefichero) #Directorio temporal de la config r.hist(lista,main=variable,xlab=variable, nclass=int(opciones[u"NúmeroIntervalos"])) r.dev_off() return diccionario
def create_p_value_boxplot_eps(best_p_values, filename): from rpy import r r.postscript(filename, horizontal=False, height=4.5, width=6, pointsize=10) try: keys = best_p_values.keys() keys.sort() r.boxplot( map(best_p_values.get, keys), names=map(str, keys), xlab="sample size", ylab="p-score") finally: r.dev_off()
def hist_plot_ratio(self, dict1, dict2, filename, xlabel, ylabel): #convert self.go_no2cluster and self.go_no2gene into histograms r.png('%s'%filename) x_list = [] y_list = [] keys = Set(dict1.keys()).union( Set(dict2.keys()) ) for key in keys: value1 = dict1.get(key, []) value2 = dict2.get(key, []) ratio = float(len(value1))/(len(value1)+len(value2)) x_list.append(key) y_list.append(ratio) r.plot(x_list, y_list, type='h', xlab=xlabel, ylab=ylabel, main='%s v.s. %s'%(ylabel, xlabel)) r.dev_off()
def plots(regression_o, getData_o): """Plots the dataset with a regression line and a boxplot using R.""" fname1 = 'car_regress.pdf' r.pdf(fname1) r.plot(getData_o, ylab='dist', xlab='speed') r.abline(regression_o['(Intercept)'], regression_o['y'], col='red') r.dev_off() fname2 = 'car_hist.pdf' r.pdf(fname2) r.boxplot(getData_o, names=['dist', 'speed']) r.dev_off() return fname1, fname2
def test(): data=r.c([1.25,3.45,6.75,20.2,9.9]) # draw image using rpy r.png("test.png", width=300,height=300) r.plot(data, type="o", col="blue", ylim=(0,100), pch=22, lty=1, axes=False, ann=False) r.axis(1, at=(1,2,3,4,5), lab=r.c("a","b","c","d","e")) r.axis(2, las=1, at=(0,50,100)) r.box() r.title(main="CPU usage sampling result") r.title(xlab="Time") r.title(ylab="CPU") r.legend(1,100,r.c("trunk"), cex=0.8, col=r.c("blue"), pch=22, lty=1) r.dev_off()
def plot(self): #this function deals with 3 fixed parameters and 1 varying parameter self.curs.execute("select distinct %s, %s, %s, %s, tag from\ stat_plot_data where %s=%s and %s=%s and %s=%s and tag='%s' order by %s \ "%(self.option_num_dict[0].label, self.option_num_dict[1].label, self.option_num_dict[2].label,\ self.option_num_dict[3].label, self.option_num_dict[0].label, self.option_num_dict[0].value, \ self.option_num_dict[1].label, self.option_num_dict[1].value, self.option_num_dict[2].label, \ self.option_num_dict[2].value, self.tag, self.option_num_dict[3].label)) rows = self.curs.fetchall() r.png('%s'%self.ofname) for row in rows: #position 0,1,2 are fixed values, 3 is varying value, 4 is the tag value. self._plot(row) #add the legend r.legend(self.x_range[1], self.y_range[1], self.varying_list, col=range(1, self.no_of_curves+1), lty=1, pch='*', xjust=1) r.dev_off()
def output(self): self.matrix = array(self.result_array) p_value_list = map(str, self.p_value_list) self.of.write('p_value\t%s\n'%'\t'.join(p_value_list)) df = self.df_lower for cor_list in self.result_array: cor_list = map(str, cor_list) #string can be 'join'ed. easy to output self.of.write('%d\t%s\n'%(df, '\t'.join(cor_list))) df += 1 r.pdf('p_value_cor.pdf') #select a column to plot cor_list = self.matrix[:,self.column] p_value_label = self.p_value_list[self.column] df_list = range(self.df_lower, self.df_upper+1) r.plot(df_list, cor_list, type='o', pch='*', xlab='df', ylab='correlation', main='p_value: %s'%p_value_label) r.dev_off()
def plot(self): vertex_list = self.graph.node_list() number_of_nodes = len(vertex_list) for vertex in vertex_list: degree = self.graph.inc_degree(vertex) + self.graph.out_degree(vertex) if degree not in self.degree_dict: self.degree_dict[degree] = 1 else: self.degree_dict[degree] += 1 r.pdf('degree_distribution.pdf') x_list = [] y_list = [] for degree in self.degree_dict: x_list.append(r.log(degree)) y_list.append(r.log(float(self.degree_dict[degree])/number_of_nodes)) r.plot(x_list, y_list, type='p', xlab='log(k)', ylab='log(p(k))') r.dev_off()
def __print_entropies(self, entropies): """ This is for debugging purposes. """ try: from rpy import r except: print "Could not import rpy module" return r.postscript(DEBUG_ENTROPIES_FILE) r.plot(entropies, type='b', xlab="Iterations", ylab="Entropy") r.dev_off() return
def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids): """ 08-09-05 add type 08-29-05 add no_of_valids to cut genes with too few valid values """ reader = csv.reader(file(src_pathname), delimiter=delimiter) filename = os.path.basename(src_pathname) output_filename = os.path.join(outputdir, filename) std_list = [] for row in reader: gene_id = row[0] new_row = [] mask_ls = [] for i in range(1, len(row)): if row[i] == 'NA': new_row.append(1e20) mask_ls.append(1) elif row[i] == '': #ignore empty entry continue else: value = float(row[i]) if type==1: if value<=10: value = 10 value = math.log(value) new_row.append(value) mask_ls.append(0) ma_array = array(new_row, mask=mask_ls) if self.debug: print "The data vector is ",ma_array print "Its mask is ", ma_array.mask() if len(ma_array.compressed())>=no_of_valids: #at least two samples, otherwise, correlation can't be calculated #08-29-05 no_of_valids controls not too many NA's, which is for graph_modeling std = MLab.std(ma_array.compressed()) #disregard the NAs if self.debug: print "std is ",std raw_input("Continue?(Y/n)") std_list.append(std) del reader if len(std_list)>100: r.png('%s.png'%output_filename) r.hist(std_list, main='histogram',xlab='std',ylab='freq') r.dev_off()
def funcionprincipal(dato,variables,opciones): from rpy import r #pylint: disable=import-error diccionario = {} diccionario[u"Diagrama de dispersión"]={} variable1=variables[0] variable2=variables[1] lista1=dato.query(variable1) lista2=dato.query(variable2) import random nombrefichero="/tmp/driza"+str(random.randint(1,99999))+".png" diccionario[u"Diagrama de dispersión"]["ruta"] = nombrefichero r.png(nombrefichero) #Directorio temporal de la config #r.require("car") #r.scatterplot(lista1,lista2,reg_line=False,labels=False,smooth=False,span=0.5,xlab=variable1,ylab=variable2) r.pairs([lista1,lista2]) r.dev_off() return diccionario
def main(args): sourcefiles = get_src_files(args.get('in')) hashofhos = None parsedfiles = [] for i in range(len(sourcefiles) - 1): queryfile = sourcefiles[i] subjectfile = sourcefiles[i + 1] if hashofhos: idfile = 'keepids.tmp' fw = open(idfile, 'w') for id in hashofhos.keys(): fw.write(id + '\n') fw.flush() fw.close() outfile = 'red_' + get_basename(queryfile) + '.aa' os.system('reduce_fasta_file.py -f %s -i %s -o %s' % (queryfile, idfile, outfile)) queryfile = outfile blastout = blast(queryfile, subjectfile) parsedfile, hashofhos = parse_blastout(blastout, args) parsedfiles.append(parsedfile) infomsg("hits: %s" % len(hashofhos)) Homologs = integrate_all_homologs(parsedfiles, args) # stats no = [] for sid, orthlist in Homologs.iteritems(): n = len(orthlist) + 1 # infomsg( str(n) ) no.append(n) from rpy import r outfile = 'hist_size_homol_sets.pdf' title = 'Size of Homologous Sets' x = 'number of homologs' y = 'frequency' r.pdf(outfile) r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no)) r.dev_off()
def plot_values_as_boxplot_r(values_dict, filename=None, logy=False): """Create a set of boxplots (using R), one plot per variable in values_dict (dictionary of varible name and values (1- or 2-D array)), one box per row. If filename is given, the plot goes into that file as pdf. If 'logy' is True, the y-axis is plotted on the log scale. """ from rpy import r logstring = '' if logy: logstring='y' if filename is not None: r.pdf(file=filename) for var, values in values_dict.iteritems(): plot_one_boxplot_r(values, var, logstring) if filename is not None: r.dev_off()
def plot(self, hardcopy = None): if hardcopy: R.png(hardcopy, width=1024, height=768, type="cairo") R.require('qvalue') # build a qobj R.assign( "pval", self.mPValues ) R.assign( "pi0", self.mPi0 ) R.assign( "qval", self.mQValues ) R.assign( "lambda", self.mLambda ) R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""") R(""" class(qobj) <- "qvalue" """) R("""qplot(qobj)""") if hardcopy: R.dev_off()
def main( args ): sourcefiles = get_src_files( args.get('in') ) hashofhos = None parsedfiles = [] for i in range( len(sourcefiles)-1 ): queryfile = sourcefiles[i] subjectfile = sourcefiles[i+1] if hashofhos: idfile = 'keepids.tmp' fw = open( idfile, 'w' ) for id in hashofhos.keys(): fw.write( id + '\n') fw.flush() fw.close() outfile = 'red_' + get_basename(queryfile) + '.aa' os.system( 'reduce_fasta_file.py -f %s -i %s -o %s' %(queryfile,idfile,outfile) ) queryfile = outfile blastout = blast( queryfile, subjectfile ) parsedfile, hashofhos = parse_blastout( blastout, args ) parsedfiles.append( parsedfile ) infomsg( "hits: %s" %len(hashofhos) ) Homologs = integrate_all_homologs( parsedfiles, args ) # stats no = [] for sid, orthlist in Homologs.iteritems(): n = len(orthlist) + 1 # infomsg( str(n) ) no.append(n) from rpy import r outfile = 'hist_size_homol_sets.pdf' title = 'Size of Homologous Sets' x = 'number of homologs' y = 'frequency' r.pdf( outfile ) r.hist(no, xlab=x, ylab=y, main=title, col='grey', breaks=max(no)) r.dev_off()
def draw_hist_gene_freq(self, files, frequency_presence_vector_gene_id_ls, exponent, output_dir): """ 12-23-05 12-26-05 if it's not empty, then draw it 12-26-05 add an enrich_index_no_of_genes_filename_ls 01-05-06 have >10 items, then draw it """ sys.stderr.write("Drawing gene frequency histogram for each dataset...\n") #initialize a structure to store frequency list in each dataset dataset_index_gene_freq_ls = [] for i in range(len(files)): dataset_index_gene_freq_ls.append([]) for row in frequency_presence_vector_gene_id_ls: frequency = row[0] for i in range(1, len(row)-1): if row[i] == 1: dataset_index_gene_freq_ls[i-1].append(frequency) #WATCH i-1 #12-26-05 enrich_index_no_of_genes_filename_ls = [] functor = lambda x: math.pow(x, exponent) for i in range(len(files)): sys.stderr.write("%s\t%s"%('\x08'*20, i)) output_fname = os.path.join(output_dir, files[i]) #12-26-05 enrich_index_no_of_genes_filename_ls.append([sum(map(functor, dataset_index_gene_freq_ls[i])), len(dataset_index_gene_freq_ls[i]), files[i]]) if len(dataset_index_gene_freq_ls[i])>10: #01-05-06 have >10 items, then draw it r.png("%s.png"%output_fname) r.hist(dataset_index_gene_freq_ls[i], main='histogram',xlab='gene frequency',ylab='no of genes', labels=r.TRUE) r.dev_off() #12-26-05 enrich_index_no_of_genes_filename_ls.sort() enrich_index_output_fname = os.path.join(output_dir, 'enrich_index.csv') writer = csv.writer(open(enrich_index_output_fname, 'w'), delimiter ='\t') for row in enrich_index_no_of_genes_filename_ls: writer.writerow(row) del writer sys.stderr.write('Done.\n')
def plot(self, hardcopy=None): if hardcopy: R.png(hardcopy, width=1024, height=768, type="cairo") R.require('qvalue') # build a qobj R.assign("pval", self.mPValues) R.assign("pi0", self.mPi0) R.assign("qval", self.mQValues) R.assign("lambda", self.mLambda) R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""" ) R(""" class(qobj) <- "qvalue" """) R("""qplot(qobj)""") if hardcopy: R.dev_off()
def __init__(self, bedfile, dir, win, ma, out, ymin=0, ymax=0): self.L = 17 # number of letters per line Bed.__init__(self, bedfile) self.dir = dir self.win = win + ma self.ma = ma # window for moving average try: from rpy import r r.pdf(out + '.ConservationPlot.pdf') self.Run() if not ymin: ymin = self.mscore[self.mscore > 0].min() - 0.05 if not ymax: ymax = self.mscore.max() + 0.05 r.plot(range(-1*win, win+1), self.mscore, type = 'l', xlab = \ 'Distance from the Center of Enriched Regions', \ ylab = 'Conservation Score', lwd= 3, ylim = (ymin, ymax)) r.dev_off() except: print >> sys.stderr, 'error import r using rpy, will not generate phastCons plot' print sys.exc_info()[0], sys.exc_info()[1]
def single_plot(self): #this function deals with 4 fixed parameters and 1 varying parameter r.png('%s'%self.ofname) x_list = [] y_list = [] self.curs.execute("select tp, tp_m, tp1, tp1_m, tn, fp, fp_m, fn from\ stat_plot_data where %s=%s and %s=%s and %s=%s and %s=%s and tag='%s' order by %s \ "%(self.option_num_dict[0].label, self.option_num_dict[0].value, \ self.option_num_dict[1].label, self.option_num_dict[1].value, self.option_num_dict[2].label, \ self.option_num_dict[2].value, self.option_num_dict[3].label, self.option_num_dict[3].value, \ self.tag, self.option_num_dict[4].label)) plot_data = self.curs.fetchall() for entry in plot_data: tn = entry[4] fn = entry[7] if self.based_on_clusters: #using the tp_m, tp1_m and fp_m tp = entry[1] tp1 = entry[3] fp = entry[6] else: #using the tp, tp1, fp tp = entry[0] tp1 = entry[2] fp = entry[5] if self.l1: #tp1 is counted as true positive tp += tp1 else: #tp1 is counted as false positive fp += tp1 x_list.append(tp) y_list.append(float(tp)/(tp+fp)) r.plot(x_list, y_list, type='o',pch='*',xlab='consistent predictions',xlim=self.x_range,ylim=self.y_range, \ ylab='percentage', main='%s'%(self.option_num_dict[4].label), col=1) r.dev_off()
def transform_one_file(self, src_pathname, delimiter, outputdir, b_instance, threshold, type, no_of_valids): """ 08-09-05 add type 08-29-05 add no_of_valids to cut genes with too few valid values 01-05-06 deal with blank files """ reader = csv.reader(file(src_pathname), delimiter=delimiter) filename = os.path.basename(src_pathname) output_filename = os.path.join(outputdir, filename) cor_list = [] counter=0 #01-05-06 for row in reader: if counter>0: cor = float(row[3]) cor_list.append(cor) counter += 1 del reader if len(cor_list)>100: r.png('%s.png'%output_filename) r.hist(cor_list, main='histogram',xlab='cor',ylab='freq') r.dev_off()
def plot(self): r.pdf("per_p_value05.pdf") r.plot(self.dataset_no, self.per_05, type='o', pch='*', xlab='dataset no.',\ ylab='percentage', main='p_value: 0.05. #edges compared with correlation cut_off 0.6') r.dev_off() r.pdf("per_p_value025.pdf") r.plot(self.dataset_no, self.per_025, type='o', pch='*', xlab='dataset no.',\ ylab='percentage', main='p_value: 0.025. #edges compared with correlation cut_off 0.6') r.dev_off() r.pdf("per_p_value01.pdf") r.plot(self.dataset_no, self.per_01, type='o', pch='*', xlab='dataset no.',\ ylab='percentage', main='p_value: 0.01. #edges compared with correlation cut_off 0.6') r.dev_off()
def plotBundle(self, bundleD, full_filename, colorsD=None, bundlePointsD=None, legendL=None, title=None, y_max=None): if y_max is None: y_max = 0.4 if legendL is None: legendL = bundleD.keys() legendL.sort() if title is None: title = 'data' bundleIdL = bundleD.keys() bundleIdL.sort() if colorsD is None: colorsL = r.rainbow(len(bundleIdL)) colorsD = dict(zip(bundleIdL, colorsL)) colorsL = [colorsD[x] for x in bundleIdL] time_min = min([len(bundleD[x]) for x in bundleD.keys()]) timeVec = [0.5 * x for x in range(time_min)] try: r.png(full_filename, width=800, height=600) oldPar = r.par(xpd = True, mar = [x + y for (x,y) in zip(r.par()['mar'], [0,0,0,6])]) print 'plot %s' % full_filename r.plot(timeVec, timeVec, type='n', main=title, ylim=(0, y_max), xlab="time in hours after transfection", ylab="Relative Cell Counts", pch=20, lwd=1, lty = 1, cex=1.0, cex_lab=1.2, cex_main=1.5) for bundleId in bundleIdL: if not bundlePointsD is None: r.points(timeVec, bundlePointsD[bundleId], col=colorsD[bundleId], pch=20, lwd=1) r.lines(timeVec, bundlePointsD[bundleId], col=colorsD[bundleId], lwd=1, lty = 1) r.lines(timeVec, bundleD[bundleId], col=colorsD[bundleId], lwd=3, lty = 1) r.legend(max(timeVec) * 1.1, y_max, legend=legendL, fill=colorsL, cex=1.0, bg= 'whitesmoke') r.par(oldPar) r.grid(col="darkgrey") r.dev_off() except: r.dev_off() print full_filename + ' has not been printed.' return
def generateCountsGraph(self, counts, sitename, widthpx=648, resol=72, ): ''' Static function to generate graph file via R. Graphs *all* of the counts records contained in counts List ''' from rpy import r as robj # Calculate graph image information widthpx = int(widthpx) imgwidth = int( float(widthpx) / float(resol) ) ratio = float(self.config.get('data','graphratio')) imgheight = int( (float(widthpx) * ratio) / float(resol) ), counts_data = {"datetime":[], "c1":[]} (fd, tmpgraphfile)= mkstemp() logobject.debug("DataHandler.generateCountsGraph(): Temp graph filename = %s" % tmpgraphfile) for cr in counts: #logobject.debug("%s" % c) epochsecs = time.mktime(cr.datetime.timetuple()) counts_data["datetime"].append( epochsecs ) #counts_data["datetime"].append( "%s" % c.datetime ) #logobject.debug("Datetime %s converted to epoch %d" % (c.datetime, epochsecs )) counts_data["c1"].append(cr.c1) cts = counts_data['c1'] ctm = counts_data['datetime'] if len(cts) > 0: robj.bitmap(tmpgraphfile, type = "png256", width = imgwidth , height = imgheight, res = resol, ) ymin = int(self.config.get('data','counts.graph.ylim.min')) ymax = int(self.config.get('data','counts.graph.ylim.max')) robj.plot(ctm, cts, col="black", main="Counts: %s" % sitename , xlab="Time: (secs since 1970)", ylab="Counts/min", type="l", ylim=(ymin,ymax) ) robj.dev_off() import imghdr imgtype = imghdr.what(tmpgraphfile) logobject.debug("DataHandler.generateCountsGraph(): OK: What is our tempfile? = %s" % tmpgraphfile ) f = open(tmpgraphfile) else: logobject.debug("DataHandler.generateCountsGraph(): No data. Generating proper error image...") #logobject.debug("DataHandler.generateCountsGraph(): Temp error image filename = %s" % tmpgraphfile) #import Image #import imghdr #imf = Image.open(self.config.get('data','nodatapng')) #imf.save(tmpgraphfile) #imgtype = imghdr.what(tmpgraphfile) #logobject.debug("DataHandler.generateCountsGraph(): ERROR: What is our tempfile? = %s" % imgtype ) f = open(self.config.get('data','nodatapng')) return f
def plot(self, filename, list_to_plot, main_lab, xlab): max_length = max(list_to_plot) r.pdf(filename) r.hist(list_to_plot, breaks=range(max_length + 1), las=1, main=main_lab, xlab=xlab) r.dev_off()
def plot(outfile, data, out_format='png'): w = int(round(len(data) / 4.0)) if out_format == 'png': r.png(outfile, width=w * 100, height=1000, res=72) elif out_format == 'pdf': r.pdf(outfile, width=w, height=10) else: raise Exception('Unrecognised format: ' + str(out_format)) print("total: " + str(len(data))) series = [] points = {'translate': [], 'preprocessing': []} for dat in data: points['translate'].append(float(dat['translate'])) points['preprocessing'].append(float(dat['preprocessing'])) xlabels = [] for k, v in data[0].iteritems(): if k not in ["problem", 'translate', 'preprocessing']: series.append(k) points[k] = [] index = 0 for dat in data: for k in series: if dat[k] != 'no-plan': points[k].append(float(dat[k]) + \ points['translate'][index] + \ points['preprocessing'][index]) else: points[k].append(-1000) xlabels.append(dat['problem']) index += 1 max_value = max(iter([max(iter(points[k])) for k in series])) yrange = (0, max_value) legend_labels = [] x = [i for i in range(1, len(points['translate']) + 1)] y = [-1000 for i in x] r.par(mar=(7, 5, 4, 2)) r.plot(x, y, main='', xlab="", ylab='', xaxt='n', yaxt='n', pch=0, ylim=yrange, mgp=(5, 1, 0)) r.mtext("Problem", side=1, line=5) r.mtext("CPU Time (s)", side=2, line=3) pch_start = 1 pch_index = pch_start # plotting "translate" #r.plot(x, points['translate'], main='', # xlab='', ylab='Time (s)', # xaxt='n', yaxt='n', # pch=0, ylim=yrange) #legend_labels.append('translate') r.lines(x, points['translate'], lty=1) # preprocessing -- Removed since it's insignificant #r.points(x, points['preprocessing'], pch=pch_index) #pch_index =+ 1 # planner output for k in series: if k != 'translate' and k != 'preporcessing': r.points(x, points[k], pch=pch_index) pch_index += 1 legend_labels.append("FD+" + k.upper()) # put x-axis labels for i in range(0, len(xlabels)): r.axis(side=1, at=i + 1, labels=xlabels[i], las=2) # put y-axis labels base, step = get_y_step(max_value) print("base: " + str(base) + " -- step: " + str(step)) y = base for i in range(0, step): r.axis(side=2, at=y, labels=str(y), las=2) y += base # legend r.legend(1, max_value, legend_labels, pch=[i for i in range(pch_start, pch_index)]) r.dev_off()
def r_dev_off(): r.dev_off()
def kill_R(): """Manual last resort to kill the R quartz() window.""" from rpy import r r.dev_off()
def generateCountsGraph2( self, counts, sitename, widthpx=648, resol=72, ): ''' Static function to generate graph file via R. Graphs *all* of the counts records contained in counts List This one uses more in-R processing to handle dates/times (since Rpy doesn't do automatic conversions). ''' log = logging.getLogger() log.info('Generating graph for %d counts from site %s' % (len(counts), sitename)) from rpy import r as robj # Calculate graph image information ratio = float(self.config.get('data', 'graphratio')) widthpx = int(widthpx) imgwidth = int(float(widthpx) / float(resol)) imgheight = int(((float(widthpx) * ratio) / float(resol))) resol = int(resol) # Get unused file/name to put image data into... (fd, tmpgraphfile) = mkstemp() log.debug("Temp graph filename = %s" % tmpgraphfile) # Unpack CountsRecords into counts and timestamps. cts = [] ctm = [] for cr in counts: # cr.datetime = "2008-02-11 12:07:08.112117" # cr.c1 = 5440 cts.append(cr.c1) ctm.append(str(cr.datetime)) log.debug("Got list of %d counts." % len(cts)) # If there is data for a graph, import into R. if len(cts) > 0: robj.assign('rcts', cts) robj.assign('rctm', ctm) # Convert timestamps to POSIXct objects within R. # datpt <- as.POSIXct(strptime(dat,format="%Y-%m-%d %H:%M:%S")) robj( '''rctmpct <- as.POSIXct(strptime(rctm, format="%Y-%m-%d %H:%M:%S"))''' ) cmdstring = 'bitmap( "%s", type="png256", width=%s, height=%s, res=%s)' % ( tmpgraphfile, imgwidth, imgheight, resol) log.debug("R cmdstring is %s" % cmdstring) robj(cmdstring) log.debug("Completed R command string %s" % cmdstring) ymin = int(self.config.get('data', 'counts.graph.ylim.min')) ymax = int(self.config.get('data', 'counts.graph.ylim.max')) #xlabel = " ctm[%s] -- ctm[%s] " % ("0",str( len(ctm)-1)) xlabel = " %s -- %s " % (ctm[0], ctm[len(ctm) - 1]) cmdstring = 'plot( rctmpct, rcts, col="black",main="Counts: %s", xlab="Dates: %s",ylab="Counts/min",type="l", ylim=c(%d,%d) )' % ( sitename, xlabel, ymin, ymax) log.debug("R cmdstring is %s" % cmdstring) robj(cmdstring) log.debug("Completed R command string %s" % cmdstring) robj.dev_off() # Pull written image and return to caller import imghdr imgtype = imghdr.what(tmpgraphfile) log.debug("OK: What is our tempfile? = %s" % tmpgraphfile) f = open(tmpgraphfile) else: log.debug("No data. Generating proper error image...") f = open(self.config.get('data', 'nodatapng')) return f
def main(): parser = E.OptionParser( version = "%prog version: $Id: rates2rates.py 2781 2009-09-10 11:33:14Z andreas $", usage = globals()["__doc__"]) parser.add_option( "--output-filename-pattern", dest="output_filename_pattern", type="string", help="pattern for additional output files [%default]." ) parser.add_option( "--input-filename-neutral", dest="input_filename_neutral", type="string", help="a tab-separated file with rates and G+C content in neutrally evolving regions [%default]." ) parser.set_defaults( input_filename_neutral = None, output_filename_pattern = "%s", normalize = True, hardcopy = None, ) (options, args) = E.Start( parser, add_csv_options = True ) if not options.input_filename_neutral: raise ValueError( "please supply a file with neutral rates." ) lines = options.stdin.readlines() if len(lines) == 0: raise IOError ( "no input" ) from rpy import r as R import rpy R.png( options.output_filename_pattern % "fit" + ".png", width=1024, height=768, type="cairo") matrix, headers = readRates( open( options.input_filename_neutral, "r" ) ) R.assign("matrix", matrix) R.assign("headers", headers) nref = R( """length( matrix[,1] )""" ) dat = R("""dat <- data.frame(x = matrix[,2], y = matrix[,3])""") mod = R("""mod <- lm( y ~ x, dat)""") R("""plot( matrix[,2], matrix[,3], cex=%s, col="blue", pch="o", xlab="%s", ylab="%s" %s)""" % (0.3, headers[1], headers[2], "") ) R("""new <- data.frame(x = seq( min(matrix[,2]), max(matrix[,2]), (max(matrix[,2]) - min(matrix[,2])) / 100))""") R("""predict(mod, new, se.fit = TRUE)""") R("""pred.w.plim <- predict(mod, new, interval="prediction")""") R("""pred.w.clim <- predict(mod, new, interval="confidence")""") R("""matpoints(new$x,cbind(pred.w.clim, pred.w.plim[,-1]), lty=c(1,2,2,3,3), type="l")""") R.mtext( "y = %f * x + %f, r=%6.4f, n=%i" % (mod["coefficients"]["x"], mod["coefficients"]["(Intercept)"], R("""cor( dat )[2]"""), nref ), 3, cex = 1.0) R("""mean_rate <- mean( matrix[,3] )""") data_matrix, data_headers = readRates( lines ) R.assign("data_matrix", data_matrix) R.assign("data_headers", data_headers) ndata = R( """length( data_matrix[,1] )""" ) R("""points( data_matrix[,2], data_matrix[,3], cex=%s, col="red", pch="o" %s)""" % (0.3, "") ) R("""topred <- data.frame( x = data_matrix[,2] )""") R("""corrected_rates <- predict( mod, topred, se.fit = TRUE )""") uncorrected = R("""uncorrected <- data_matrix[,3] / mean_rate """) corrected = R("""corrected <- as.vector(data_matrix[,3] / corrected_rates$fit)""") R.dev_off() R.png( options.output_filename_pattern % "correction" + ".png", width=1024, height=768, type="cairo") R("""plot( uncorrected, corrected, cex=%s, col="blue", pch="o", xlab="uncorrected rate", ylab="corrected rate" %s)""" % (0.3, "") ) R.dev_off() E.Stop()