def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     categories = []
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     data['size'] = data['size'].astype(float)
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts\n(before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     data = data[['crawl', 'percentage', 'type']]
     categories = []
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
示例#3
0
def plot_thresh_distr(motif_names, thresh, out_dir, width = 350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif':motif_names, 'thresh':thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep = '\t', index = False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename = os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot = gp, width = width, height = 300, unit = 'mm')
示例#4
0
def plot_thresh_distr(motif_names, thresh, out_dir, width=350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif': motif_names, 'thresh': thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep='\t', index=False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename=os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot=gp,
                    width=width,
                    height=300,
                    unit='mm')
示例#5
0
 def plot_stacked_bar(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['size'] = data['size'].astype(float)
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts (before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
示例#8
0
    def plot(self,
             fn,
             x='x',
             y='y',
             col=None,
             group=None,
             w=1100,
             h=800,
             size=2,
             smooth=True,
             point=True,
             jitter=False,
             boxplot=False,
             boxplot2=False,
             title=False,
             flip=False,
             se=False,
             density=False,
             line=False):
        df = self.df
        #import math, datetime

        grdevices = importr('grDevices')

        if not title:
            title = fn.split("/")[-1]

        grdevices.png(file=fn, width=w, height=h)
        gp = ggplot2.ggplot(df)
        pp = gp
        if col and group:
            pp += ggplot2.aes_string(x=x, y=y, col=col, group=group)
        elif col:
            pp += ggplot2.aes_string(x=x, y=y, col=col)
        elif group:
            pp += ggplot2.aes_string(x=x, y=y, group=group)
        else:
            pp += ggplot2.aes_string(x=x, y=y)

        if boxplot:
            if col:
                pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),
                                           color='blue')
            else:
                pp += ggplot2.geom_boxplot(color='blue')

        if point:
            if jitter:
                if col:
                    pp += ggplot2.geom_point(ggplot2.aes_string(fill=col,
                                                                col=col),
                                             size=size,
                                             position='jitter')
                else:
                    pp += ggplot2.geom_point(size=size, position='jitter')
            else:
                if col:
                    pp += ggplot2.geom_point(ggplot2.aes_string(fill=col,
                                                                col=col),
                                             size=size)
                else:
                    pp += ggplot2.geom_point(size=size)

        if boxplot2:
            if col:
                pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),
                                           color='blue',
                                           outlier_colour="NA")
            else:
                pp += ggplot2.geom_boxplot(color='blue')

        if smooth:
            if smooth == 'lm':
                if col:
                    pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col),
                                              size=1,
                                              method='lm',
                                              se=se)
                else:
                    pp += ggplot2.stat_smooth(col='blue',
                                              size=1,
                                              method='lm',
                                              se=se)
            else:
                if col:
                    pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col),
                                              size=1,
                                              se=se)
                else:
                    pp += ggplot2.stat_smooth(col='blue', size=1, se=se)

        if density:
            pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..'))

        if line:
            pp += ggplot2.geom_line(position='jitter')

        pp += ggplot2.opts(
            **{
                'title': title,
                'axis.text.x': ggplot2.theme_text(size=24),
                'axis.text.y': ggplot2.theme_text(size=24, hjust=1)
            })
        #pp+=ggplot2.scale_colour_brewer(palette="Set1")
        pp += ggplot2.scale_colour_hue()
        if flip:
            pp += ggplot2.coord_flip()

        pp.plot()
        grdevices.dev_off()
        print ">> saved: " + fn
示例#9
0
	def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False):
		df=self.df
		#import math, datetime
		

		grdevices = importr('grDevices')

		if not title:
			title=fn.split("/")[-1]

		grdevices.png(file=fn, width=w, height=h)
		gp = ggplot2.ggplot(df)
		pp = gp	
		if col and group:
			pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group)
		elif col:
			pp+=ggplot2.aes_string(x=x, y=y,col=col)
		elif group:
			pp+=ggplot2.aes_string(x=x, y=y,group=group)
		else:
			pp+=ggplot2.aes_string(x=x, y=y)	

		if boxplot:
			if col:
				pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue')
			else:
				pp+=ggplot2.geom_boxplot(color='blue')	

		if point:
			if jitter:
				if col:
					pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter')
				else:
					pp+=ggplot2.geom_point(size=size,position='jitter')
			else:
				if col:
					pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size)
				else:
					pp+=ggplot2.geom_point(size=size)


		if boxplot2:
			if col:
				pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA")
			else:
				pp+=ggplot2.geom_boxplot(color='blue')

		if smooth:
			if smooth=='lm':
				if col:
					pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se)
				else:
					pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se)
			else:
				if col:
					pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se)
				else:
					pp+=ggplot2.stat_smooth(col='blue',size=1,se=se)

		if density:
			pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..'))

		if line:
			pp+=ggplot2.geom_line(position='jitter')


		pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} )
		#pp+=ggplot2.scale_colour_brewer(palette="Set1")
		pp+=ggplot2.scale_colour_hue()
		if flip:
			pp+=ggplot2.coord_flip()



		pp.plot()
		grdevices.dev_off()
		print ">> saved: "+fn
示例#10
0
文件: r_plot.py 项目: yarden/biorpy
def plot_qc_reads(qc_df):
    """
    Plot number of reads part of a pipeline QC file.
    """
    # Record NA values as 0
    qc_df = qc_df.fillna(0)#.set_index("sample")
    cols = ["sample",
            "num_reads",
            "num_mapped",
            "num_unique_mapped",
            "num_junctions"]
    qc_df = qc_df[cols]
    melted_qc = pandas.melt(qc_df, id_vars=["sample"])
    qc_r = conversion_pydataframe(melted_qc)
    labels = tuple(["num_reads",
                    "num_mapped",
                    "num_unique_mapped",
                    "num_junctions"])
    labels = robj.StrVector(labels)
    variable_i = qc_r.names.index('variable')
    qc_r[variable_i] = robj.FactorVector(qc_r[variable_i],
                                         levels = labels)
    ggplot2.theme_set(ggplot2.theme_bw(12))
    scales = importr("scales")
    r_opts = r.options(scipen=4)
    p = ggplot2.ggplot(qc_r) + \
        ggplot2.geom_point(aes_string(x="sample", y="value")) + \
        ggplot2.scale_y_continuous(trans=scales.log10_trans(),
                                   breaks=scales.trans_breaks("log10",
                                                              robj.r('function(x) 10^x')),
                                   labels=scales.trans_format("log10",
                                                              robj.r('math_format(10^.x)'))) + \
        r.xlab("CLIP-Seq samples") + \
        r.ylab("No. reads") + \
        ggplot2.coord_flip() + \
        ggplot2.facet_wrap(Formula("~ variable"), ncol=1) + \
        theme(**{"panel.grid.major.x": element_blank(),
                 "panel.grid.minor.x": element_blank(),
                 "panel.grid.major.y": theme_line(size=0.5,colour="grey66",linetype=3)})
    p.plot()

    return
    r.par(mfrow=np.array([1,2]))
    num_samples = len(qc_df.num_reads)
    r.par(bty="n", lwd=1.7, lty=2)
    r_opts = r.options(scipen=4)
    r.options(r_opts)
    r.dotchart(convert_to_r_matrix(qc_df[["num_reads",
                                          "num_mapped",
                                          "num_unique_mapped"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)
    r.par(bty="n")
    r.dotchart(convert_to_r_matrix(qc_df[["num_ribosub_mapped",
                                          "num_ribo",
                                          "num_junctions"]]),
               xlab="No. reads",
               lcolor="black",
               pch=19,
               gcolor="darkblue",
               cex=0.8)