def make_output(tss_cov, out_prefix, upstream, downstream): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-upstream,downstream+1): print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i]) raw_out.close() # make plot data structures tss_i = ro.IntVector(range(-upstream,downstream+1)) cov = ro.FloatVector(tss_cov) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') # plot to file grdevices.pdf(file='%s_full.pdf' % out_prefix) gp.plot() grdevices.dev_off() # construct zoomed plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \ ggplot2.scale_y_continuous('Coverage') # plot to file grdevices.pdf(file='%s_zoom.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def plot_squiggle(args, filename, start_times, mean_signals): """ Use rpy2 to create a squiggle plot of the read """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] total_time = start_times[-1] - start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([t - t_0 for t in start_times]) r_mean_signals = robjects.FloatVector(mean_signals) # infer the appropriate number of events given the number of facets num_events = len(r_mean_signals) events_per_facet = (num_events / args.num_facets) + 1 # dummy variable to control faceting facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))]) # make a data frame of the start times and mean signals d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category} df = robjects.DataFrame(d) gp = ggplot2.ggplot(df) if not args.theme_bw: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) else: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \ + ggplot2.theme_bw() if args.saveas is not None: plot_file = os.path.basename(filename) + "." + args.saveas if os.path.isfile(plot_file): raise Exception('Cannot create plot for %s: plot file %s already exists' % (filename, plot_file)) if args.saveas == "pdf": grdevices.pdf(plot_file, width = 8.5, height = 11) elif args.saveas == "png": grdevices.png(plot_file, width = 8.5, height = 11, units = "in", res = 300) pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def make_output_and(cov, control_cov, out_prefix, window): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-window/2,window/2+1): print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(2*range(-window/2,window/2+1)) cov_r = ro.FloatVector(cov+control_cov) labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov)) df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels}) # construct plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('Position relative to splice site') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def main(): usage = 'usage: %prog [options] arg' parser = OptionParser(usage) #parser.add_option() (options,args) = parser.parse_args() if len(args) != 1: parser.error('Must provide BAM file') else: bam_file = args[0] align_lengths = {} for aligned_read in pysam.Samfile(bam_file, 'rb'): align_lengths[aligned_read.qlen] = align_lengths.get(aligned_read.qlen,0) + 1 min_len = min(align_lengths.keys()) max_len = max(align_lengths.keys()) # construct data frame len_r = ro.IntVector(range(min_len,max_len+1)) counts_r = ro.IntVector([align_lengths.get(l,0) for l in range(min_len,max_len+1)]) df = ro.DataFrame({'length':len_r, 'counts':counts_r}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='length', y='counts') + \ ggplot2.geom_bar(stat='identity') + \ ggplot2.scale_x_continuous('Alignment length') + \ ggplot2.scale_y_continuous('') # plot to file grdevices.pdf(file='align_lengths.pdf') gp.plot() grdevices.dev_off()
def make_output_and(cov, control_cov, out_prefix, window): # dump raw counts to file raw_out = open("%s_raw.txt" % out_prefix, "w") for i in range(-window / 2, window / 2 + 1): print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1)) cov_r = ro.FloatVector(cov + control_cov) labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov)) df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels}) # construct plot gp = ( ggplot2.ggplot(df) + ggplot2.aes_string(x="splice_i", y="cov", colour="label") + ggplot2.geom_point() + ggplot2.scale_x_continuous("Position relative to splice site") + ggplot2.scale_y_continuous("Coverage") + ggplot2.scale_colour_discrete("") ) # plot to file grdevices.pdf(file="%s.pdf" % out_prefix) gp.plot() grdevices.dev_off()
def make_output(cov, out_prefix, window): # dump raw counts to file raw_out = open("%s_raw.txt" % out_prefix, "w") for i in range(-window / 2, window / 2 + 1): print >> raw_out, "%d\t%e" % (i, cov[window / 2 + i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(range(-window / 2, window / 2 + 1)) cov = ro.FloatVector(cov) df = ro.DataFrame({"splice_i": splice_i, "cov": cov}) # construct plot gp = ( ggplot2.ggplot(df) + ggplot2.aes_string(x="splice_i", y="cov") + ggplot2.geom_point() + ggplot2.scale_x_continuous("Position relative to splice site") + ggplot2.scale_y_continuous("Coverage") ) # plot to file grdevices.pdf(file="%s.pdf" % out_prefix) gp.plot() grdevices.dev_off()
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream): # clean raw counts dir if os.path.isdir('%s_raw' % out_prefix): shutil.rmtree('%s_raw' % out_prefix) os.mkdir('%s_raw' % out_prefix) # dump raw counts to file for te in te_tss_cov: if te[0] in [ 'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7' ] and te[1] in [ 'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR', 'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie', 'LTR/ERVK', 'DNA/TcMar-Tigger' ]: raw_out = open( '%s_raw/%s_%s.txt' % (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')), 'w') for i in range(-upstream, downstream + 1): print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][ upstream + i], control_te_tss_cov[te][upstream + i]) raw_out.close() # clean plot dirs if os.path.isdir('%s_plot' % out_prefix): shutil.rmtree('%s_plot' % out_prefix) os.mkdir('%s_plot' % out_prefix) # make data structures tss_i = ro.IntVector(2 * range(-upstream, downstream + 1)) labels = ro.StrVector(['Main'] * (upstream + downstream + 1) + ['Control'] * (upstream + downstream + 1)) for te in te_tss_cov: if te[0] in [ 'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7' ] and te[1] in [ 'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR', 'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie', 'LTR/ERVK', 'DNA/TcMar-Tigger' ]: cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te]) df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf( file='%s_plot/%s_%s.pdf' % (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_'))) gp.plot() grdevices.dev_off()
def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data,id_var=['x','y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
def main(): usage = 'usage: %prog [options] <raw file>' parser = OptionParser(usage) parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]') parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]') parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]') parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]') (options,args) = parser.parse_args() if len(args) != 1: parser.error('Must provide raw file') else: raw_file = args[0] # collect data coords = [] main_cov = [] control_cov = [] for line in open(raw_file): a = line.split() coords.append(int(a[0])) main_cov.append(float(a[1])) control_cov.append(float(a[2])) # data structures tss_i = ro.IntVector(range(-options.upstream,options.downstream+1)) labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1)) cov = ro.FloatVector(main_cov + control_cov) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels}) # plot ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_colour_discrete('') ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \ ggplot2.scale_x_continuous('TSS Position') + \ ggplot2.scale_colour_discrete('') + \ ggplot2.theme_bw() if options.ymax == None: gp += ggplot2.scale_y_continuous('Coverage') else: gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax])) # save to file grdevices.pdf(file='%s_and.pdf' % options.out_prefix) gp.plot() grdevices.dev_off()
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean, prefix=''): pandas2ri.activate() subgroups_to_lrr_count = {} columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []} for subgroup, (acc20means, acc20_count) in subgroups_to_lrrs_acc20mean.items(): subgroups_to_lrr_count[subgroup] = acc20_count for index, acc20mean in enumerate(acc20means): columns_to_data['subgroup'].append(subgroup) columns_to_data['pos'].append(index + 1) columns_to_data['acc20'].append(acc20mean) # Write the count of LRRs for each subgroup to file with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"), 'w') as f: for subgroup, lrr_count in subgroups_to_lrr_count.items(): f.write(str.format("{}: {}\n", subgroup, lrr_count)) # Generate the line chart file r_columns_to_data = { 'subgroup': ro.StrVector(columns_to_data['subgroup']), 'pos': ro.IntVector(columns_to_data['pos']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) line_chart_file_path = os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_acc20_line.png") logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx'))) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None): """Makes correlation plots between CEL files for the same cell type""" fsize = 10 names_1 = [] names_2 = [] cors = [] titles = [] for ex_idx, ex in enumerate(expt_names): # Indices of CEL files (columns of expr) corresponding to that cell type tmp_idx = expt_name_idx[ex] plot_idx = 0 for i in range(len(tmp_idx)): name1 = re.sub('_', '.', cel_names[tmp_idx[i]]) for j in range(i + 1, len(tmp_idx)): name2 = re.sub('_', '.', cel_names[tmp_idx[j]]) plot_idx += 1 cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1] names_1.append(name1) names_2.append(name2) cors.append(cor) titles.append(ex + '-' + str(plot_idx)) df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 'y':ro.FloatVector(expr[:, tmp_idx[j]])}) gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \ ggplot2.geom_point(size = 1) + \ ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \ ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.title.x':ggplot2.element_text(size = 8), 'axis.text.y':ggplot2.element_text(size = fsize), 'axis.title.y':ggplot2.element_text(size = 8, angle = 90), 'plot.title':ggplot2.element_text(size = fsize)}) if outdir is None: gp.plot() else: if not os.path.isdir(outdir): os.makedirs(outdir) outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png') ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm') df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles) if not outdir is None: df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t') return df
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname, line_chart_file_path): logging.debug( str.format("Begin to generate {}, data {}", line_chart_file_path, ts_to_acc20s)) ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s) columns_to_data = {tname: [], 'site': [], 'acc20': []} for ss, acc20means in ts_to_acc20mean.items(): for index, acc20mean in enumerate(acc20means): columns_to_data[tname].append(ss) columns_to_data['site'].append(index - 5) columns_to_data['acc20'].append(acc20mean) # Generate the line chart file r_columns_to_data = { tname: ro.StrVector(columns_to_data[tname]), 'site': ro.IntVector(columns_to_data['site']), 'acc20': ro.FloatVector(columns_to_data['acc20']) } df = ro.DataFrame(r_columns_to_data) logging.debug( str.format("The Data Frame for file {}: \n{}", line_chart_file_path, df)) grdevices.png(file=line_chart_file_path, width=1024, height=512) gp = ggplot2.ggplot(df) pp = gp + \ ggplot2.theme_bw() + \ ggplot2.theme_classic() + \ ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \ ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \ ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \ ggplot2.geom_point(size=4, shape=20) + \ ggplot2.geom_line(size=3) + \ ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \ ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \ ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))), labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5'])) pp.plot() logging.info(str.format("Output step3 file {}", line_chart_file_path)) grdevices.dev_off()
def singleTablePlot_gg(parser, args): ''' kmerdict is a defaultdict(int) It can take both empty and non-empty kmerdicts returns update of the input kmerdict given the input string and k''' r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') kmerdict = kmercount_in_table(args.table1) data = defaultdict(list) numKmers = len(kmerdict) for k in sorted(kmerdict.keys()): data['kmers'].append(k) data['counts'].append(kmerdict[k]) df = robjects.DataFrame(data) gp = ggplot2.ggplot(df) ## pp = gp + ggplot2.geom_bar(stat="identity") pp = gp + ggplot2.aes_string(x=range(1,numKmers+1),y=data['counts']) \ + ggplot2.geom_bar(stat="identity") \ + ggplot2.scale_x_continuous(name="kmer", breaks=0.5+(range(1,numKmers+1)), labels=kmers) pp.plot() print('Type enter to exit.') raw_input()
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream): # clean raw counts dir if os.path.isdir('%s_raw' % out_prefix): shutil.rmtree('%s_raw' % out_prefix) os.mkdir('%s_raw' % out_prefix) # dump raw counts to file for te in te_tss_cov: if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: raw_out = open('%s_raw/%s_%s.txt' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')),'w') for i in range(-upstream,downstream+1): print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][upstream+i], control_te_tss_cov[te][upstream+i]) raw_out.close() # clean plot dirs if os.path.isdir('%s_plot' % out_prefix): shutil.rmtree('%s_plot' % out_prefix) os.mkdir('%s_plot' % out_prefix) # make data structures tss_i = ro.IntVector(2*range(-upstream,downstream+1)) labels = ro.StrVector(['Main']*(upstream+downstream+1)+['Control']*(upstream+downstream+1)) for te in te_tss_cov: if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']: cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te]) df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_y_continuous('Coverage') + \ ggplot2.scale_colour_discrete('') # plot to file grdevices.pdf(file='%s_plot/%s_%s.pdf' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_'))) gp.plot() grdevices.dev_off()
def make_output(cov, out_prefix, window): # dump raw counts to file raw_out = open('%s_raw.txt' % out_prefix,'w') for i in range(-window/2,window/2+1): print >> raw_out, '%d\t%e' % (i, cov[window/2+i]) raw_out.close() # make plot data structures splice_i = ro.IntVector(range(-window/2,window/2+1)) cov = ro.FloatVector(cov) df = ro.DataFrame({'splice_i':splice_i, 'cov':cov}) # construct plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='splice_i', y='cov') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('Position relative to splice site') + \ ggplot2.scale_y_continuous('Coverage') # plot to file grdevices.pdf(file='%s.pdf' % out_prefix) gp.plot() grdevices.dev_off()
def main(): usage = 'usage: %prog [options] arg' parser = OptionParser(usage) #parser.add_option() (options, args) = parser.parse_args() if len(args) != 1: parser.error('Must provide BAM file') else: bam_file = args[0] align_lengths = {} for aligned_read in pysam.Samfile(bam_file, 'rb'): align_lengths[aligned_read.qlen] = align_lengths.get( aligned_read.qlen, 0) + 1 min_len = min(align_lengths.keys()) max_len = max(align_lengths.keys()) # construct data frame len_r = ro.IntVector(range(min_len, max_len + 1)) counts_r = ro.IntVector( [align_lengths.get(l, 0) for l in range(min_len, max_len + 1)]) df = ro.DataFrame({'length': len_r, 'counts': counts_r}) # construct full plot gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='length', y='counts') + \ ggplot2.geom_bar(stat='identity') + \ ggplot2.scale_x_continuous('Alignment length') + \ ggplot2.scale_y_continuous('') # plot to file grdevices.pdf(file='align_lengths.pdf') gp.plot() grdevices.dev_off()
d['group'] = StrVector([d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.labs(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width = 712, height = 512) p.plot() grdevices.dev_off() #base = importr("base") stats = importr('stats') nlme = importr("nlme") fit = nlme.lmList(Formula('time ~ n_loop | group'), data = dataf,
pp = gp + \ ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station") + \ ggplot2.scale_colour_gradientn(colours=colours2)+ \ ggplot2.geom_text(col="black")+ \ ggplot2.geom_point() ggplot2.ggtitle(graphtitle) pp.plot() ''' #robjects.r.ggsave((str(args.out).replace(".pdf",""))+"map.pdf") onlyfiltxxx=robjects.r('onl<-subset(allfilters,filtersize %in% c("0p1","0p8","3p0"))') #print "here is" #print robjects.r('print(datass1)') #print "there was" ''' ggplot2.scale_x_continuous(name=xlabel,breaks=scales.pretty_breaks(20)) +\ ggplot2.scale_y_continuous(labels=scales.comma,name=ylabel,breaks=scales.pretty_breaks(10))+ ggplot2.theme(title=ggplot2.element_text(colour="blue",face="bold"))''' if args.plotfraction=="False": barsdata=robjects.r('p<-ggplot(data=onl, aes(x=mystation, y=istranscripts,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))') if args.plotfraction=="True": barsdata=robjects.r('p<-ggplot(data=onl, aes(x=mystation, y=ratio,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))') #barsdata=robjects.r('p<-ggplot(data=datass1, aes(x=mystation, y=ratio,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))') #teest=robjects.r('p<-p+coord_fixed(ratio=400)') #barsg = ggplot2.ggplot(onlysurf) #barsg=barsg+ggplot2.aes_string(x="mystation", y="ratio") #barsg=barsg+ggplot2.geom_bar(stat="identity") #bbb=robjects.r('p<-ggplot(data=smalls11, aes(x=mystation, y=ratio,fill=filtersize)) + geom_bar(stat="identity",position=position_dodge())+theme(axis.text.x=element_text(angle=90))') #barsg=barsg+ggplot2.geom_bar(stat="identity",position=ggplot2.position_dodge()) #barsg=barsg+ggplot2.theme(axis.text.x=ggplot2.element_text(angle=90))
ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \ ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \ ggplot2.stat_smooth(color="black") + \ ggplot2.opts(**{'legend.position': 'none'}) + \ ggplot2.scale_x_continuous("Obama Vote Share") + \ ggplot2.scale_y_continuous("Distance to nearest Railroad") p_sub.plot(vp=vp_sub) grdevices.dev_off()
def plot_squiggle(args, filename, start_times, mean_signals): """ Use rpy2 to create a squiggle plot of the read """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] total_time = start_times[-1] - start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([t - t_0 for t in start_times]) r_mean_signals = robjects.FloatVector(mean_signals) # infer the appropriate number of events given the number of facets num_events = len(r_mean_signals) events_per_facet = (num_events / args.num_facets) + 1 # dummy variable to control faceting facet_category = robjects.FloatVector([(i / events_per_facet) + 1 for i in range(len(start_times))]) # make a data frame of the start times and mean signals d = {'start': r_start_times, 'mean': r_mean_signals, 'cat': facet_category} df = robjects.DataFrame(d) gp = ggplot2.ggplot(df) if not args.theme_bw: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) else: pp = gp + ggplot2.aes_string(x='start', y='mean') \ + ggplot2.geom_step(size=0.25) \ + ggplot2.facet_wrap(robjects.Formula('~cat'), ncol=1, scales="free_x") \ + ggplot2.scale_x_continuous('Time (seconds)') \ + ggplot2.scale_y_continuous('Mean signal (picoamps)') \ + ggplot2.ggtitle('Squiggle plot for read: ' + filename + "\nTotal time (sec): " + str(total_time)) \ + ggplot2.theme(**{'plot.title': ggplot2.element_text(size=11)}) \ + ggplot2.theme_bw() if args.saveas is not None: plot_file = os.path.basename(filename) + "." + args.saveas if os.path.isfile(plot_file): raise Exception( 'Cannot create plot for %s: plot file %s already exists' % (filename, plot_file)) if args.saveas == "pdf": grdevices.pdf(plot_file, width=8.5, height=11) elif args.saveas == "png": grdevices.png(plot_file, width=8.5, height=11, units="in", res=300) pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir=None): """Makes correlation plots between CEL files for the same cell type""" fsize = 10 names_1 = [] names_2 = [] cors = [] titles = [] for ex_idx, ex in enumerate(expt_names): # Indices of CEL files (columns of expr) corresponding to that cell type tmp_idx = expt_name_idx[ex] plot_idx = 0 for i in range(len(tmp_idx)): name1 = re.sub('_', '.', cel_names[tmp_idx[i]]) for j in range(i + 1, len(tmp_idx)): name2 = re.sub('_', '.', cel_names[tmp_idx[j]]) plot_idx += 1 cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1] names_1.append(name1) names_2.append(name2) cors.append(cor) titles.append(ex + '-' + str(plot_idx)) df = ro.DataFrame({ 'x': ro.FloatVector(expr[:, tmp_idx[i]]), 'y': ro.FloatVector(expr[:, tmp_idx[j]]) }) gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \ ggplot2.geom_point(size = 1) + \ ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \ ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \ ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize), 'axis.title.x':ggplot2.element_text(size = 8), 'axis.text.y':ggplot2.element_text(size = fsize), 'axis.title.y':ggplot2.element_text(size = 8, angle = 90), 'plot.title':ggplot2.element_text(size = fsize)}) if outdir is None: gp.plot() else: if not os.path.isdir(outdir): os.makedirs(outdir) outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png') ro.r.ggsave(filename=outfile, plot=gp, width=85, height=85, unit='mm') df = pd.DataFrame({ 'name1': names_1, 'name2': names_2, 'cor': cors }, index=titles) if not outdir is None: df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep='\t') return df
def plot_volcano_with_r( data, xlabel='Estimated effect (change in H/L ratio)', title='', max_labels=20, color_background='#737373', color_significant='#252525', color_significant_muted='#252525', label_only_large_fc=False, special_labels=None, special_palette=None, base_size=12, label_size=3, x='logFC', y='neg_log10_p_adjust', special_labels_mode='all', xlim=None, skip_labels=None, nudges=None, ): r_data, r_like_data = transform_data_for_ggplot( data, label_only_large_fc=label_only_large_fc, special_labels=special_labels, max_labels=max_labels, special_labels_mode=special_labels_mode, skip_labels=skip_labels, nudges=nudges) plot = r_ggplot2.ggplot(r_data) plot += r_ggplot2.theme_minimal(base_size=base_size) plot += r_ggplot2.theme( **{ 'panel.grid.major': r_ggplot2.element_blank(), 'panel.grid.minor': r_ggplot2.element_blank(), 'panel.border': r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black") }) plot += r_ggplot2.theme( text=r_ggplot2.element_text(family='Helvetica', face='plain')) plot += r_ggplot2.theme( **{ 'plot.title': r_ggplot2.element_text(hjust=0.5), # 'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)), }) aes_points = r_ggplot2.aes_string(x=x, y=y, color='group') scale_points = r_ggplot2.scale_colour_manual( aes_points, values=r_label_palette( r_like_data, special_palette, color_background=color_background, color_significant=color_significant, color_significant_muted=color_significant_muted)) plot += aes_points plot += scale_points if xlim is not None: plot += r_ggplot2.scale_x_continuous( labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim)) else: plot += r_ggplot2.scale_x_continuous( labels=r_custom.formatterFunTwoDigits) plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit) plot += r_ggplot2.geom_hline( yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE), color='#BDBDBD', alpha=.3) plot += r_ggplot2.geom_point(**{'show.legend': False}) aes_text = r_ggplot2.aes_string(label='label') plot += aes_text plot += r_ggrepel.geom_text_repel( aes_text, nudge_x=r_dollar(r_data, 'nudgex'), nudge_y=r_dollar(r_data, 'nudgey'), size=label_size, family='Helvetica', **{ 'show.legend': False, 'point.padding': 0.25, 'min.segment.length': 0, #'max.iter':0, 'segment.color': '#BDBDBD' }, ) plot += r_ggplot2.labs(x=xlabel, y='Adjusted p value (-log10)', title=title) plot.plot()
ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot ## define layout of subplot with viewports vp_sub = grid.viewport(x=0.19, y=0.2, width=0.32, height=0.4) p_sub = ggplot2.ggplot(RR_distance) + \ ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \ ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \ ggplot2.stat_smooth(color="black") + \ ggplot2.opts(**{'legend.position': 'none'}) + \ ggplot2.scale_x_continuous("Obama Vote Share") + \ ggplot2.scale_y_continuous("Distance to nearest Railroad") p_sub.plot(vp=vp_sub) grdevices.dev_off()
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) # make a data frame of the lists d = {'start': r_start_times, 'lengths': r_read_lengths, 'cumul': cumulative} df = robjects.DataFrame(d) if args.savedf: robjects.r("write.table")(df, file=args.savedf, sep="\t") # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_step(size=2) \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) # extrapolation if args.extrapolate: start = robjects.ListVector({'a': 1, 'b': 1}) pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls', formula='y~a*I((x*3600)^b)', se='FALSE', start=start) \ + ggplot2.xlim(0, float(args.extrapolate)) if args.theme_bw: pp = pp + ggplot2.theme_bw() if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 8.5, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 8.5, height = 8.5, units = "in", res = 300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
number_of_peaks = len(dataf[0]) cvI = [] newRow = [] for i in range(1,number_of_peaks+1): row = dataf.rx(i,True) rowA = np.array(row) newRow.append(rowA[2:]) cvI.append(cv(rowA[2:])) #cv.append(rowA[2:].std()/rowA[2:].mean()) cv_r=robjects.conversion.py2ri(cvI) df_cv = {'CV' : cv_r} dataf_cv = robjects.DataFrame(df_cv) dtf_cv = robjects.r.melt(dataf_cv) d=dataf.cbind(dtf_cv.rx(2)) d.names[tuple(d.colnames).index('value')] = 'CV' #d = base.merge_data_frame(dataf,dtf_cv.rx(2)) utilis.write_csv(d, options.csv_output) dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1))) #n_peak = robjects.IntVector(1,number_of_peaks) gp = ggplot2.ggplot(dc) pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \ ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV") r.X11() pp.plot()
def main(): usage = 'usage: %prog [options] <raw file>' parser = OptionParser(usage) parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]') parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]') parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]') parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]') (options, args) = parser.parse_args() if len(args) != 1: parser.error('Must provide raw file') else: raw_file = args[0] # collect data coords = [] main_cov = [] control_cov = [] for line in open(raw_file): a = line.split() coords.append(int(a[0])) main_cov.append(float(a[1])) control_cov.append(float(a[2])) # data structures tss_i = ro.IntVector(range(-options.upstream, options.downstream + 1)) labels = ro.StrVector(['Main'] * (options.upstream + options.downstream + 1) + ['Control'] * (options.upstream + options.downstream + 1)) cov = ro.FloatVector(main_cov + control_cov) df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels}) # plot ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_point() + \ ggplot2.scale_x_continuous('TSS index') + \ ggplot2.scale_colour_discrete('') ''' gp = ggplot2.ggplot(df) + \ ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \ ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \ ggplot2.scale_x_continuous('TSS Position') + \ ggplot2.scale_colour_discrete('') + \ ggplot2.theme_bw() if options.ymax == None: gp += ggplot2.scale_y_continuous('Coverage') else: gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector( [0, options.ymax])) # save to file grdevices.pdf(file='%s_and.pdf' % options.out_prefix) gp.plot() grdevices.dev_off()
[x[1] for x in combos_r]) d['group'] = StrVector( [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.opts(title = "Benchmark (running time)") from rpy2.robjects.packages import importr grdevices = importr('grDevices') grdevices.png('../../_static/benchmark_sum.png', width=712, height=512) p.plot() grdevices.dev_off() #base = importr("base") stats = importr('stats') nlme = importr("nlme") fit = nlme.lmList(Formula('time ~ n_loop | group'), data=dataf, na_action=stats.na_exclude)
##text_log+="average: "+str(rmean(test23)[0])+end ##text_log+="sum: "+str(rsum(test23)[0])+end # #roughbin= round(ma[0]/100) #bins=round(roughbin/100)*100 #ma2=rmax(ed) #dataf_subset = dataf.rx(dataf.rx2("contig").ro >= 18, true) scales = importr('scales') gp = ggplot2.ggplot(dataf) #geom_histogram(aes(y = ..density..)) # ggplot2.geom_density()+\ # pp = gp + ggplot2.aes_string(x='%s(contrrr)') + ggplot2.geom_histogram()+ggplot2.scale_y_sqrt() bins=10 teest3=robjects.r('theme(axis.text.x=element_text(angle=90))') pp = gp + \ ggplot2.aes_string(x='Length') + \ ggplot2.geom_histogram()+\ ggplot2.ggtitle("Found IS fragment lengths")+ \ ggplot2.scale_x_continuous(name="fragment lengths, bin="+str(bins),breaks=scales.pretty_breaks(20)) +\ ggplot2.scale_y_continuous(labels=scales.comma,name="Count",breaks=scales.pretty_breaks(10))+ \ teest3 pp.plot() robjects.r.ggsave("/Users/security/science/dna_subj_hist.pdf")
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) # make a data frame of the lists d = {'start': r_start_times, 'lengths': r_read_lengths, 'cumul': cumulative} df = robjects.DataFrame(d) # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_point() \ + ggplot2.geom_line() \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width = 8.5, height = 8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width = 8.5, height = 8.5, units = "in", res = 300) else: print >>sys.stderr, "Unrecognized extension for %s!" % (plot_file) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()
heat_demand = np.zeros(37) Bdim = robjects.FloatVector([12,6]) for i,BO in enumerate(range(0,361,10)): res = ECR(Building_Orientation = BO, Building_Dim = Bdim) heat_demand[i] = res[2][0] # Transfor to R data types hd = robjects.FloatVector([h for h in heat_demand]) bo = robjects.FloatVector([b for b in range(0,361,10)]) # Create a python dictionary p_datadic = {'Heat_Demand': hd, 'Building_Orientation': bo} # Create R data.frame r_dataf = robjects.DataFrame(p_datadic) # plot with ggplot2 gp = ggplot2.ggplot(r_dataf) pp = gp + ggplot2.aes_string(y= 'Heat_Demand', x= 'Building_Orientation') + \ ggplot2.geom_line(colour = "red", size = 1) + \ ggplot2.coord_polar(direction = -1, start = -pi/2) + \ ggplot2.ggtitle("Heat demand for all possible buildimg orientations") + \ ggplot2.scale_x_continuous(breaks=robjects.FloatVector(range(0, 360, 15))) pp.plot() grdevices.dev_off()
def plot_collectors_curve(args, start_times, read_lengths): """ Use rpy2 to create a collectors curve of the run """ r = robjects.r r.library("ggplot2") grdevices = importr('grDevices') # set t_0 as the first measured time for the read. t_0 = start_times[0] # adjust times to be relative to t_0 r_start_times = robjects.FloatVector([float(t - t_0) / float(3600) + 0.00000001 \ for t in start_times]) r_read_lengths = robjects.IntVector(read_lengths) # compute the cumulative based on reads or total base pairs if args.plot_type == 'reads': y_label = "Total reads" cumulative = \ r.cumsum(robjects.IntVector([1] * len(start_times))) elif args.plot_type == 'basepairs': y_label = "Total base pairs" cumulative = r.cumsum(r_read_lengths) step = args.skip # make a data frame of the lists d = { 'start': robjects.FloatVector( [r_start_times[n] for n in xrange(0, len(r_start_times), step)]), 'lengths': robjects.IntVector( [r_read_lengths[n] for n in xrange(0, len(r_read_lengths), step)]), 'cumul': robjects.IntVector( [cumulative[n] for n in xrange(0, len(cumulative), step)]) } df = robjects.DataFrame(d) if args.savedf: robjects.r("write.table")(df, file=args.savedf, sep="\t") # title total_reads = len(read_lengths) total_bp = sum(read_lengths) plot_title = "Yield: " \ + str(total_reads) + " reads and " \ + str(total_bp) + " base pairs." # plot gp = ggplot2.ggplot(df) pp = gp + ggplot2.aes_string(x='start', y='cumul') \ + ggplot2.geom_step(size=2) \ + ggplot2.scale_x_continuous('Time (hours)') \ + ggplot2.scale_y_continuous(y_label) \ + ggplot2.ggtitle(plot_title) # extrapolation if args.extrapolate: start = robjects.ListVector({'a': 1, 'b': 1}) pp = pp + ggplot2.stat_smooth(fullrange='TRUE', method='nls', formula='y~a*I((x*3600)^b)', se='FALSE', start=start) \ + ggplot2.xlim(0, float(args.extrapolate)) if args.theme_bw: pp = pp + ggplot2.theme_bw() if args.saveas is not None: plot_file = args.saveas if plot_file.endswith(".pdf"): grdevices.pdf(plot_file, width=8.5, height=8.5) elif plot_file.endswith(".png"): grdevices.png(plot_file, width=8.5, height=8.5, units="in", res=300) else: logger.error("Unrecognized extension for %s!" % (plot_file)) sys.exit() pp.plot() grdevices.dev_off() else: pp.plot() # keep the plot open until user hits enter print('Type enter to exit.') raw_input()