Exemplo n.º 1
0
    def _plot_with_rpy2(self, regions, filename):
        from rpy2 import robjects
        import rpy2.robjects.lib.ggplot2 as ggplot2
        from rpy2.robjects.lib import grid
        from rpy2.robjects.packages import importr
        grdevices = importr('grDevices')
        base = importr('base')
        grdevices.pdf(file=filename + '.pdf')

        t = [x for x in range(-self.num_bins, self.num_bins + 1)]
        for region in regions[:self.num_regs]:
            if not np.any(region.weighted):
                logger.warning(
                    "Warning: No data for region located on bin " + str(region.bin) + ". Not plotting this one.")
                continue
            middle = (len(region.weighted[0]) - 1) / 2
            if middle < self.num_bins:
                logger.error("Warning: There are less bins calculated for regions than you want to plot.")
                sys.exit(1)
            d = {'map': robjects.StrVector(
                [str(m) for sublist in [[x] * len(t) for x in range(len(region.weighted))] for m in sublist]),
                't': robjects.FloatVector(t * len(region.weighted)),
                'e': robjects.FloatVector([i for sublist in region.weighted for i in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'p': robjects.FloatVector([-np.log10(x) for sublist in region.pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]]),
                'c': robjects.FloatVector([-np.log10(x) for sublist in region.corrected_pvalues for x in
                                           sublist[middle - self.num_bins:middle + self.num_bins + 1]])}
            dataf = robjects.DataFrame(d)
            gp = ggplot2.ggplot(dataf)  # first yellow second red
            p1 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='e', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.scale_y_continuous(trans='log2') + ggplot2.ggtitle(
                "\n".join(wrap("Bin " + str(region.bin) + " : " + str(region.positions)))) + ggplot2.labs(
                y="log Intensity") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.y': ggplot2.element_text(angle=45),
                   'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p2 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='p', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(
                y="-log10(p-value)") + ggplot2.theme_classic() + ggplot2.theme(
                **{'axis.title.x': ggplot2.element_blank(), 'axis.text.x': ggplot2.element_blank(),
                   'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            p3 = gp + ggplot2.geom_line(mapping=ggplot2.aes_string(x='t', y='c', group='map', colour='map'),
                                        alpha=0.8) + ggplot2.labs(y="-log10(q-value)",
                                                                  x='bins (' + str(self.bin_res) + ' bp each)') + \
                 ggplot2.geom_hline(mapping=ggplot2.aes_string(yintercept=str(-np.log10(self.threshold))),
                                    colour='black', alpha=0.8, linetype='dashed') + ggplot2.theme_classic() + \
                 ggplot2.theme(**{'legend.position': 'none'}) + ggplot2.scale_colour_brewer(palette="Set1")
            g1 = ggplot2.ggplot2.ggplotGrob(p1)
            g2 = ggplot2.ggplot2.ggplotGrob(p2)
            g3 = ggplot2.ggplot2.ggplotGrob(p3)
            robjects.globalenv["g"] = base.rbind(g1, g2, g3, size='first')
            robjects.r("grid::grid.draw(g)")
            grid.newpage()
            logger.debug('Plotted region ' + str(region.bin))

        grdevices.dev_off()
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     data = data[['crawl', 'percentage', 'type']]
     categories = []
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
Exemplo n.º 3
0
def gg_funcs(functions,bottom,top,N=1000,labels = ["Baseline"],
             title = "Consumption and Cash-on-Hand", ylab = "y", xlab="x", 
             loc = loc, ltitle = 'Variable',
             file_name = None):
    if type(functions)==list:
        function_list = functions
    else:
        function_list = [functions]
       
    step = (top-bottom)/N
    x = np.arange(bottom,top,step)
    fig = pd.DataFrame({'x': x})
    #xx there's got to be a better way to scroll through this list
    i = 0
    for function in function_list:
        fig[labels[i]] = function(x)
        #print labels[i]
        i=i+1
    fig = pd.melt(fig, id_vars=['x'])  
    #print(fig)
    g = gg.ggplot(fig) + \
        mp.base_plot + mp.line + mp.point +  \
        mp.theme_bw(base_size=9) + mp.fte_theme +mp.colors +  \
        gg.labs(title=title,y=ylab,x=xlab) + mp.legend_f(loc) + mp.legend_t_c(ltitle) + mp.legend_t_s(ltitle) #+ \
        #
        #gg.geom_text(data=pd.DataFrame(data={'l':"test"},index=np.arange(1)), x = "1", y = "1",group="1",colour="1", label = "plot mpg vs. wt")
        #gg.geom_text(data=pd.DataFrame(data={'l':"test"},index=np.arange(1)), mapping=gg.aes_string(x="1", y="1",group="1",colour="1",shape="1", mapping="l")) 
    if file_name is not None:
        mp.ggsave(file_name,g)
    return(g)
Exemplo n.º 4
0
def gg_funcs(functions,bottom,top,N=1000,labels = ["Baseline"],
             title = "Consumption and Cash-on-Hand", ylab = "y", xlab="x", 
             loc = loc, ltitle = 'Variable',
             file_name = None):
    if type(functions)==list:
        function_list = functions
    else:
        function_list = [functions]
       
    step = (top-bottom)/N
    x = np.arange(bottom,top,step)
    fig = pd.DataFrame({'x': x})
    #xx there's got to be a better way to scroll through this list
    i = 0
    for function in function_list:
        fig[labels[i]] = function(x)
        #print labels[i]
        i=i+1
    fig = pd.melt(fig, id_vars=['x'])  
    #print(fig)
    g = gg.ggplot(fig) + \
        mp.base_plot + mp.line + mp.point +  \
        mp.theme_bw(base_size=9) + mp.fte_theme +mp.colors +  \
        gg.labs(title=title,y=ylab,x=xlab) + mp.legend_f(loc) + mp.legend_t_c(ltitle) + mp.legend_t_s(ltitle) #+ \
        #
        #gg.geom_text(data=pd.DataFrame(data={'l':"test"},index=np.arange(1)), x = "1", y = "1",group="1",colour="1", label = "plot mpg vs. wt")
        #gg.geom_text(data=pd.DataFrame(data={'l':"test"},index=np.arange(1)), mapping=gg.aes_string(x="1", y="1",group="1",colour="1",shape="1", mapping="l")) 
    if file_name is not None:
        mp.ggsave(file_name,g)
    return(g)
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     categories = []
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     data['size'] = data['size'].astype(float)
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts\n(before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
Exemplo n.º 6
0
 def line_plot(self, data, title, ylabel, img_file,
               x='date', y='size', c='type', clabel=''):
     if PLOTLIB == 'ggplot':
         # date_label = "%Y\n%b"
         date_label = "%Y\n%W"  # year + week number
         p = ggplot(data,
                    aes(x=x, y=y, color=c)) \
             + ggtitle(title) \
             + ylab(ylabel) \
             + xlab(' ') \
             + scale_x_date(breaks=date_breaks('3 months'),
                            labels=date_label) \
             + geom_line() + geom_point()
     elif PLOTLIB == 'rpy2.ggplot2':
         # convert y axis to float because R uses 32-bit signed integers,
         # values > 2 bln. (2^31) will overflow
         data[y] = data[y].astype(float)
         p = ggplot2.ggplot(data) \
             + ggplot2.aes_string(x=x, y=y, color=c) \
             + ggplot2.geom_line() + ggplot2.geom_point() \
             + GGPLOT2_THEME \
             + ggplot2.labs(title=title, x='', y=ylabel, color=clabel)
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
Exemplo n.º 7
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
	Plot the pore performance
	"""
    import math

    r = robjects.r
    r.library("ggplot2")
    grdevices = importr("grDevices")

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

            # make a data frame of the lists
    d = {
        "rownum": robjects.IntVector(range(1, 17) * 32),
        "colnum": robjects.IntVector(sorted(range(1, 33) * 16)),
        "log10_tot_bp": robjects.IntVector(pore_values),
        "labels": robjects.IntVector(flowcell_layout),
    }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = (
        gp
        + gg.aes_string(y="factor(rownum, rev(rownum))", x="factor(colnum)")
        + gg.geom_point(gg.aes_string(color="log10_tot_bp"), size=7)
        + gg.geom_text(gg.aes_string(label="labels"), colour="white", size=2)
        + gg.scale_colour_gradient2(low="black", mid="black", high="red")
        + gg.coord_fixed(ratio=1.4)
        + gg.labs(x=gg.NULL, y=gg.NULL)
    )

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=11, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width=11, height=8.5, units="in", res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print ("Type enter to exit.")
        raw_input()
Exemplo n.º 8
0
def plot_start(x, y):
    import rpy2.robjects.lib.ggplot2 as ggplot2
    ##由于这一条import会有警告信息,放到这里,只有调用这个函数才会出现警告。
    utils = importr('utils')
    data = utils.read_csv(glob('*.csv')[0])
    plot = ggplot2.ggplot(data)
    plot = (plot + ggplot2.aes_string(x=x, y=y) + ggplot2.geom_point() +
            ggplot2.scale_colour_gradient(low="yellow", high="red") +
            ggplot2.labs(title="mtcars", x='wt', y='mpg'))
    plot.save('point.png')
Exemplo n.º 9
0
	def histogram(self, dataframe, filename, parm, group, units):
		with suppress_stdout():
			grdevices.png(file=filename, width=512, height=512)
			data = ggplot2.ggplot(dataframe)
			aes = ggplot2.aes_string(x=parm,fill = group)
			geom = ggplot2.geom_histogram(colour="black")
			labs = ggplot2.labs(x=parm + " " + units)
			gg = data + aes + geom + labs
			gg.plot()
			grdevices.dev_off()
Exemplo n.º 10
0
 def histogram(self, dataframe, filename, parm, group, units):
     with suppress_stdout():
         grdevices.png(file=filename, width=512, height=512)
         data = ggplot2.ggplot(dataframe)
         aes = ggplot2.aes_string(x=parm, fill=group)
         geom = ggplot2.geom_histogram(colour="black")
         labs = ggplot2.labs(x=parm + " " + units)
         gg = data + aes + geom + labs
         gg.plot()
         grdevices.dev_off()
Exemplo n.º 11
0
 def plot_similarity_matrix(self, item_type, image_file, title):
     '''Plot similarities of crawls (overlap of unique items)
     as heat map matrix'''
     data = defaultdict(dict)
     n = 1
     for crawl1 in self.similarity[item_type]:
         for crawl2 in self.similarity[item_type][crawl1]:
             similarity = self.similarity[item_type][crawl1][crawl2]
             data['crawl1'][n] = MonthlyCrawl.short_name(crawl1)
             data['crawl2'][n] = MonthlyCrawl.short_name(crawl2)
             data['similarity'][n] = similarity
             data['sim_rounded'][n] = similarity  # to be rounded
             n += 1
     data = pandas.DataFrame(data)
     print(data)
     # select median of similarity values as midpoint of similarity scale
     midpoint = data['similarity'].median()
     decimals = 3
     textsize = 2
     minshown = .0005
     if (data['similarity'].max()-data['similarity'].min()) > .2:
         decimals = 2
         textsize = 2.8
         minshown = .005
     data['sim_rounded'] = data['sim_rounded'].apply(
         lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0')
         if x >= minshown else '0')
     print('Median of similarities for', item_type, '=', midpoint)
     matrix_size = len(self.similarity[item_type])
     if matrix_size > self.MAX_MATRIX_SIZE:
         n = 0
         for crawl1 in sorted(self.similarity[item_type], reverse=True):
             short_name = MonthlyCrawl.short_name(crawl1)
             if n > self.MAX_MATRIX_SIZE:
                 data = data[data['crawl1'] != short_name]
                 data = data[data['crawl2'] != short_name]
             n += 1
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl2', y='crawl1',
                              fill='similarity', label='sim_rounded') \
         + ggplot2.geom_tile(color="white") \
         + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white",
                                        midpoint=midpoint, space="Lab") \
         + GGPLOT2_THEME \
         + ggplot2.coord_fixed() \
         + ggplot2.theme(**{'axis.text.x':
                            ggplot2.element_text(angle=45,
                                                 vjust=1, hjust=1)}) \
         + ggplot2.labs(title=title, x='', y='') \
         + ggplot2.geom_text(color='black', size=textsize)
     img_path = os.path.join(PLOTDIR, image_file)
     p.save(img_path)
     return p
Exemplo n.º 12
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
    Plot the pore performance
    """
    import math
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

    # make a data frame of the lists
    d = {'rownum': robjects.IntVector(range(1,17)*32),
         'colnum': robjects.IntVector(sorted(range(1,33)*16)),
         'log10_tot_bp': robjects.IntVector(pore_values),
         'labels': robjects.IntVector(flowcell_layout)
         }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \
                     x = 'factor(colnum)') \
        + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \
        + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \
        + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \
        + gg.coord_fixed(ratio=1.4) \
        + gg.labs(x=gg.NULL, y=gg.NULL)

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width = 11, height = 8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width = 11, height = 8.5,
                units = "in", res = 300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
Exemplo n.º 13
0
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel):
    grdevices.pdf(file=plot_file_path)

    gp = ggplot2.ggplot(data)
    pp = gp \
        + ggplot2.aes_string(x='genotype', y='abundance') \
        + ggplot2.geom_boxplot() \
        + ggplot2.ggtitle(title) \
        + ggplot2.labs(x=xlabel, y=ylabel) \
        + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \
        + ggplot2.geom_point()

    pp.plot()

    grdevices.dev_off()
 def plot_dupl_url(self):
     # -- pages per URL (URL-level duplicates)
     row_filter = ['url']
     data = self.histogr
     data = data[data['type'].isin(row_filter)]
     title = 'Pages per URL (URL-level duplicates)'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='count', y='frequency') \
         + ggplot2.geom_jitter() \
         + ggplot2.facet_wrap('crawl', ncol=5) \
         + ggplot2.labs(title=title, x='(duplicate) pages per URL',
                        y='log(frequency)') \
         + ggplot2.scale_y_log10()
     # + ggplot2.scale_x_log10()  # could use log-log scale
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_url_dupl.png')
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
 def plot_host_domain_tld(self):
     # -- pages/URLs per host / domain / tld
     data = self.histogr
     data = data[data['type'].isin(['host', 'domain', 'tld'])]
     data = data[data['type_counted'].isin(['url'])]
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_host_domain_tld.png')
     # data.to_csv(img_path + '.csv')
     title = 'URLs per Host / Domain / TLD'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='count', weight='frequency', color='type') \
         + ggplot2.geom_freqpoly(bins=20) \
         + ggplot2.facet_wrap('crawl', ncol=4) \
         + ggplot2.labs(title='', x=title,
                        y='Frequency') \
         + ggplot2.scale_y_log10() \
         + ggplot2.scale_x_log10()
     p.save(img_path)
     return p
Exemplo n.º 16
0
 def plot_dupl_url(self):
     # -- pages per URL (URL-level duplicates)
     row_filter = ['url']
     data = self.histogr
     data = data[data['type'].isin(row_filter)]
     title = 'Pages per URL (URL-level duplicates)'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='count', y='frequency') \
         + ggplot2.geom_jitter() \
         + ggplot2.facet_wrap('crawl', ncol=5) \
         + ggplot2.labs(title=title, x='(duplicate) pages per URL',
                        y='log(frequency)') \
         + ggplot2.scale_y_log10()
     # + ggplot2.scale_x_log10()  # could use log-log scale
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_url_dupl.png')
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
Exemplo n.º 17
0
 def plot_host_domain_tld(self):
     # -- pages/URLs per host / domain / tld
     data = self.histogr
     data = data[data['type'].isin(['host', 'domain', 'tld'])]
     data = data[data['type_counted'].isin(['url'])]
     img_path = os.path.join(PLOTDIR,
                             'crawler/histogr_host_domain_tld.png')
     # data.to_csv(img_path + '.csv')
     title = 'URLs per Host / Domain / TLD'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='count', weight='frequency', color='type') \
         + ggplot2.geom_freqpoly(bins=20) \
         + ggplot2.facet_wrap('crawl', ncol=4) \
         + ggplot2.labs(title='', x=title,
                        y='Frequency') \
         + ggplot2.scale_y_log10() \
         + ggplot2.scale_x_log10()
     p.save(img_path)
     return p
Exemplo n.º 18
0
	def scatter(self, dataframe, filename, parm1, parm2, units1, units2, group,logx,logy):
		grdevices.png(file=filename, width=512, height=512)
		data = ggplot2.ggplot(dataframe)
		aes = ggplot2.aes_string(x=parm1, y=parm2,colour=group)
		geom = ggplot2.geom_point(alpha = 0.7)
		labs = ggplot2.labs(x=parm1+ " " + units1, y=parm2 + " " + units2)
		xlogscale = ggplot2.scale_x_log10()
		ylogscale = ggplot2.scale_y_log10()
		
		if logx == True and logy == True:
			gg = data + aes + geom + labs + xlogscale + ylogscale
		elif logx == True:
			gg = data + aes + geom + labs + xlogscale 
		elif logy == True:
			gg = data + aes + geom + labs + ylogscale
		else:
			gg = data + aes + geom + labs 
			
		gg.plot()
		grdevices.dev_off()
def plot_histogram(fastq_file, plot_filename_png):
    """Plots histogram of length distribution of sequence in fastq_file and
        saves to plot_filename_png"""
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')
    
    sizes = []
    
    with open(fastq_file, 'rb') as f:
        # skip first line
        for _ in itertools.islice(f, 0, 1):
            pass
        # Get every 4th line with raw sequence letters
        fourthlines = itertools.islice(f, 0, None, 4)
        for line in fourthlines:
            sizes.append(len(line.strip()))
            
    sizes = robjects.IntVector([s for s in sizes])

    sizes_min = min(sizes)
    sizes_max = max(sizes)
    
    binwidth = (sizes_max - sizes_min) / 20
    
    d = {'sizes' : sizes}
    df = robjects.DataFrame(d)
    
    # plot
    gp = ggplot2.ggplot(df)
    
    pp = gp + ggplot2.aes_string(x='sizes') \
            + ggplot2.geom_histogram(binwidth=binwidth) \
            + ggplot2.theme_grey() \
            + ggplot2.labs(title =plot_filename_png, \
                x = "Size (in nucleotides)", y = "Count") 
            
    grdevices.png(plot_filename_png, width = 8.5, height = 8.5, 
                units = "in", res = 300)
    pp.plot()
    grdevices.dev_off()
Exemplo n.º 20
0
    def scatter(self, dataframe, filename, parm1, parm2, units1, units2, group,
                logx, logy):
        grdevices.png(file=filename, width=512, height=512)
        data = ggplot2.ggplot(dataframe)
        aes = ggplot2.aes_string(x=parm1, y=parm2, colour=group)
        geom = ggplot2.geom_point(alpha=0.7)
        labs = ggplot2.labs(x=parm1 + " " + units1, y=parm2 + " " + units2)
        xlogscale = ggplot2.scale_x_log10()
        ylogscale = ggplot2.scale_y_log10()

        if logx == True and logy == True:
            gg = data + aes + geom + labs + xlogscale + ylogscale
        elif logx == True:
            gg = data + aes + geom + labs + xlogscale
        elif logy == True:
            gg = data + aes + geom + labs + ylogscale
        else:
            gg = data + aes + geom + labs

        gg.plot()
        grdevices.dev_off()
Exemplo n.º 21
0
 def plot_stacked_bar(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
Exemplo n.º 23
0
def rpy2_plotter(anno, clusters, name):
    """Plot genes distribution in clusters using ggplot2 from R."""
    pandas2ri.activate()
    grdevices = importr('grDevices')
    rprint = robjects.globalenv.get("print")

    anno = anno.sort_values(by="n_ft", ascending=False)
    anno = anno.head(n=10)
    category = anno["category"].tolist()
    clusters = clusters[clusters["category"].isin(category)]
    clusters = pandas2ri.py2ri(clusters)

    pp = ggplot2.ggplot(clusters) + ggplot2.aes_string(
        x="n_features") + ggplot2.geom_histogram(
            binwidth=1) + ggplot2.facet_wrap(robjects.Formula("~category"),
                                             ncol=5) + ggplot2.labs(
                                                 x="Number of Features",
                                                 y="Number of Clusters",
                                                 title="Clusters distribution")

    grdevices.pdf(file=name, width=11.692, height=8.267)
    rprint(pp)
    grdevices.dev_off()
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['size'] = data['size'].astype(float)
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts (before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
Exemplo n.º 25
0
 def plot_domain_cumul(self, crawl):
     # -- coverage (cumulative pages) per domain
     data = self.histogr
     data = data[data['type'].isin(['domain'])]
     data = data[data['crawl'] == crawl]
     data = data[data['type_counted'].isin(['url'])]
     data['urls'] = data['count']*data['frequency']
     print(data)
     data = data[['urls', 'count', 'frequency']]
     data = data.sort_values(['count'], ascending=0)
     data['cum_domains'] = data['frequency'].cumsum()
     data['cum_urls'] = data['urls'].cumsum()
     data_perc = data.apply(lambda x: round(100.0*x/float(x.sum()), 1))
     data['%domains'] = data_perc['frequency']
     data['%urls'] = data_perc['urls']
     data['%cum_domains'] = data['cum_domains'].apply(
         lambda x: round(100.0*x/float(data['frequency'].sum()), 1))
     data['%cum_urls'] = data['cum_urls'].apply(
         lambda x: round(100.0*x/float(data['urls'].sum()), 1))
     with pandas.option_context('display.max_rows', None,
                                'display.max_columns', None,
                                'display.width', 200):
         print(data)
     img_path = os.path.join(PLOTDIR,
                             'crawler/histogr_domain_cumul.png')
     # data.to_csv(img_path + '.csv')
     title = 'Cumulative URLs for Top Domains'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='cum_domains', y='cum_urls') \
         + ggplot2.geom_line() + ggplot2.geom_point() \
         + GGPLOT2_THEME \
         + ggplot2.labs(title=title, x='domains cumulative',
                        y='URLs cumulative') \
         + ggplot2.scale_y_log10() \
         + ggplot2.scale_x_log10()
     p.save(img_path)
     return p
Exemplo n.º 26
0
 def plot_domain_cumul(self, crawl):
     # -- coverage (cumulative pages) per domain
     data = self.histogr
     data = data[data['type'].isin(['domain'])]
     data = data[data['crawl'] == crawl]
     data = data[data['type_counted'].isin(['url'])]
     data['urls'] = data['count'] * data['frequency']
     print(data)
     data = data[['urls', 'count', 'frequency']]
     data = data.sort_values(['count'], ascending=0)
     data['cum_domains'] = data['frequency'].cumsum()
     data['cum_urls'] = data['urls'].cumsum()
     data_perc = data.apply(lambda x: round(100.0 * x / float(x.sum()), 1))
     data['%domains'] = data_perc['frequency']
     data['%urls'] = data_perc['urls']
     data['%cum_domains'] = data['cum_domains'].apply(
         lambda x: round(100.0 * x / float(data['frequency'].sum()), 1))
     data['%cum_urls'] = data['cum_urls'].apply(
         lambda x: round(100.0 * x / float(data['urls'].sum()), 1))
     with pandas.option_context('display.max_rows', None,
                                'display.max_columns', None,
                                'display.width', 200):
         print(data)
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_domain_cumul.png')
     # data.to_csv(img_path + '.csv')
     title = 'Cumulative URLs for Top Domains'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='cum_domains', y='cum_urls') \
         + ggplot2.geom_line() + ggplot2.geom_point() \
         + GGPLOT2_THEME \
         + ggplot2.labs(title=title, x='domains cumulative',
                        y='URLs cumulative') \
         + ggplot2.scale_y_log10() \
         + ggplot2.scale_x_log10()
     p.save(img_path)
     return p
def plot_volcano_with_r(
    data,
    xlabel='Estimated effect (change in H/L ratio)',
    title='',
    max_labels=20,
    color_background='#737373',
    color_significant='#252525',
    color_significant_muted='#252525',
    label_only_large_fc=False,
    special_labels=None,
    special_palette=None,
    base_size=12,
    label_size=3,
    x='logFC',
    y='neg_log10_p_adjust',
    special_labels_mode='all',
    xlim=None,
    skip_labels=None,
    nudges=None,
):

    r_data, r_like_data = transform_data_for_ggplot(
        data,
        label_only_large_fc=label_only_large_fc,
        special_labels=special_labels,
        max_labels=max_labels,
        special_labels_mode=special_labels_mode,
        skip_labels=skip_labels,
        nudges=nudges)

    plot = r_ggplot2.ggplot(r_data)
    plot += r_ggplot2.theme_minimal(base_size=base_size)
    plot += r_ggplot2.theme(
        **{
            'panel.grid.major':
            r_ggplot2.element_blank(),
            'panel.grid.minor':
            r_ggplot2.element_blank(),
            'panel.border':
            r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black")
        })
    plot += r_ggplot2.theme(
        text=r_ggplot2.element_text(family='Helvetica', face='plain'))
    plot += r_ggplot2.theme(
        **{
            'plot.title': r_ggplot2.element_text(hjust=0.5),
            #                               'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)),
        })

    aes_points = r_ggplot2.aes_string(x=x, y=y, color='group')
    scale_points = r_ggplot2.scale_colour_manual(
        aes_points,
        values=r_label_palette(
            r_like_data,
            special_palette,
            color_background=color_background,
            color_significant=color_significant,
            color_significant_muted=color_significant_muted))

    plot += aes_points
    plot += scale_points

    if xlim is not None:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim))
    else:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits)

    plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit)

    plot += r_ggplot2.geom_hline(
        yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)),
        color='#BDBDBD',
        alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)

    plot += r_ggplot2.geom_point(**{'show.legend': False})

    aes_text = r_ggplot2.aes_string(label='label')
    plot += aes_text
    plot += r_ggrepel.geom_text_repel(
        aes_text,
        nudge_x=r_dollar(r_data, 'nudgex'),
        nudge_y=r_dollar(r_data, 'nudgey'),
        size=label_size,
        family='Helvetica',
        **{
            'show.legend': False,
            'point.padding': 0.25,
            'min.segment.length': 0,
            #'max.iter':0,
            'segment.color': '#BDBDBD'
        },
    )

    plot += r_ggplot2.labs(x=xlabel,
                           y='Adjusted p value (-log10)',
                           title=title)

    plot.plot()
Exemplo n.º 28
0
def rest():
    df = q1_median_q3_rep_wide
    pops = ["pdc", "dc-cd11b", "dc-cd8a"]

    stats_l = []
    for stat, (popa, popb) in product(["Q1", "median", "Q3"],
                                      product(pops, pops)):
        print(stat, popa, popb)

        popa = "hsc"
        popb = "pdc"
        stat = "median"

        mw_u, pvalue = scipy.stats.mannwhitneyu(
            [0.8, 0.81, 0.79],
            [0.4, 0.39, 0.41],
            # df.query("Population == @popa")[stat].to_numpy(),
            # df.query("Population == @popb")[stat].to_numpy(),
            use_continuity=True,
            alternative="two-sided",
        )
        pvalue

        stats_l.append([stat, popa, popb, mw_u, pvalue])
    stats_df = pd.DataFrame(stats_l).set_axis(
        ["stat", "popA", "popB", "U", "pvalue"], axis=1)

    kruskal_format_means = pd.pivot(
        q1_median_q3_rep_wide.query("Population in @pops"),
        index="Population",
        columns="Replicate",
        values="mean",
    )

    import scikit_posthocs

    stat, p_value = scipy.stats.kruskal(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    dunn_res_df = scikit_posthocs.posthoc_dunn(
        kruskal_format_means.to_numpy(),
        p_adjust='fdr_bh',
        sort=True,
    )

    stat, pvalue = scipy.stats.f_oneway(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    import statsmodels

    df = kruskal_format_means.stack().reset_index()

    kruskal_format_means

    res = statsmodels.stats.multicomp.pairwise_tukeyhsd(
        df[0], df['Population'].to_numpy(), alpha=0.05)

    res.pvalues
    res.summary()

    # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F)

    plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"]

    results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt"
    point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png"

    q1_median_q3_rep_wide

    ggplot_data = (
        q1_median_q3_rep_long.query("Population in @plot_pops").sort_values(
            "value",
            ascending=False,
        ).groupby(["Population", "stat"]).apply(
            lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1))))

    g = (gg.ggplot(ggplot_data) + gg.aes_string(
        x="Population", y="value", group="group_order", color="stat") +
         gg.geom_point(position=gg.position_dodge(width=0.5), size=1) +
         mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x=''))
    a = 3

    rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6)))

    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        # additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(6),
    )

    q1_median_q3_rep_wide

    g = (
        gg.ggplot(
            q1_median_q3_rep_wide.query("Population in @plot_pops").assign(
                sample=lambda df: df["Population"].astype(str) + df[
                    "Replicate"].astype(str))) + gg.geom_boxplot(
                        gg.aes_string(
                            x="Population",
                            fill="Population",
                            group="sample",
                            lower="Q1",
                            upper="Q3",
                            middle="median",
                            ymin="min1",
                            ymax="max99",
                            # position=gg.position_dodge(width=0.5),
                        ),
                        stat="identity",
                    )
        # + mh_rpy2_styling.gg_paper_theme
        + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) +
        gg.scale_fill_brewer(guide=False))
    a = 3
    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(7),
    )
    # image_png2(g, (ut.cm(12), ut.cm(12)))

    beta_values.loc[:, ("hsc", "1")]
Exemplo n.º 29
0
 def test_labs(self):
     la = ggplot2.labs()
     assert isinstance(la, ggplot2.Labs)
Exemplo n.º 30
0
hour_speed
   
# Pickups by hour (aggregated across all days)
max_passenger_count = np.max(np.log1p(pickups_hr['passenger_count']))
min_passenger_count = np.log1p(1)

for i in range(0,24):
        temp = pickups_hr[(pickups_hr.hour==i)].reset_index()
        temp.passenger_count = np.log1p(temp.passenger_count)
        temp_r = pandas2ri.py2ri(temp)
        p = ggplot2.ggplot(temp_r) + \
            ggplot2.aes_string(x='rounded_lon', y='rounded_lat', color='passenger_count') + \
            ggplot2.geom_point(size=0.5) + \
            ggplot2.scale_color_gradient(low='black', high='white', limits=np.array([min_passenger_count, max_passenger_count])) + \
            ggplot2.xlim(-74.2, -73.7) + ggplot2.ylim(40.56, 40.93) + \
            ggplot2.labs(x=' ', y=' ', title='NYC taxi pickups %02i:00' % i) + \
            ggplot2.guides(color=False)
        p.save('./plots/taxi_pickups%02i.png' % i, width=4, height=4.5)
        
# Create animated .gif of pickups by hour
import imageio

file_names = sorted((fn for fn in os.listdir('./plots') if fn.startswith('taxi_pickups')))
file_names = ['plots/' + s for s in file_names]
images = []
for filename in file_names:
    images.append(imageio.imread(filename))
imageio.mimsave('./plots/pickups_movie.gif', images, duration=0.4)

# total pickups by date, color
p1 = ggplot2.ggplot(pandas2ri.py2ri(date_avgs)) + \
Exemplo n.º 31
0


from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop", 
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop", 
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.labs(title = "Benchmark (running time)")


from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png',
              width = 712, height = 512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'), data = dataf, 
                  na_action = stats.na_exclude)
Exemplo n.º 32
0
def show1():
	open1()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
Exemplo n.º 33
0
def show4():
	open4()
	r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8")
	data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv')
	pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)})
	pp.plot()
Exemplo n.º 34
0
                               linetype='variable')
line = ggplot2.geom_line()
point = ggplot2.geom_point()
vert_line_onset = ggplot2.geom_vline(xintercept=-1.5,
                                     linetype=2,
                                     colour="#999999")
vert_line_exhaust = ggplot2.geom_vline(xintercept=5.5,
                                       linetype=2,
                                       colour="#999999")
vert_line_exhaust_FL = ggplot2.geom_vline(xintercept=3.5,
                                          linetype=2,
                                          colour="#999999")
colors = ggplot2.scale_colour_manual(values=robjects.r.palette_lines)
hollow = ggplot2.scale_shape_manual(
    values=robjects.r('c(16,17,15,18,6,7,9,3)'))
xlab = ggplot2.labs(x="Months Since First UI Check")
loc_default = robjects.r('c(1,0)')
legend_f = lambda loc=loc_default: ggplot2.theme(**{
    'legend.position': loc,
    'legend.justification': loc
})
ggsave = lambda filename, plot: robjects.r.ggsave(
    filename="../out/" + filename + ".pdf", plot=plot, width=6, height=4)

colors_alt = ggplot2.scale_colour_manual(values=robjects.r.palette_lines[1])
shape_alt = ggplot2.scale_shape_manual(values=17)

ggplot2_env = robjects.baseenv['as.environment']('package:ggplot2')


class GBaseObject(robjects.RObject):
Exemplo n.º 35
0
    [d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))])
dataf = DataFrame(d)

from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop",
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop",
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.labs(title = "Benchmark (running time)")

from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png',
              width=812,
              height=612,
              type="cairo")
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
nlme = importr("nlme")
fit = nlme.lmList(Formula('time ~ n_loop | group'),
                  data=dataf,
Exemplo n.º 36
0
def _plt_percountr(dat, independentpdf=False, fname='xpercount.pdf'):
    def _filt_dat(dat, item, getlabel=True):
        df = pd.DataFrame(dat[item].value_counts())
        df.columns = ['count']
        if getlabel:
            df['label'] = [
                list(dat[dat[item] == i]['label'])[0] for i in df.index
            ]
        n = len(df)
        mx = max(df['count'])
        return df, n, mx

    dat = dat[dat['label'] != 'NA']

    ## NUMBER OF MIRNA PER TSS
    df, n, mx = _filt_dat(dat, 'tss', False)
    df = {'count': robjects.IntVector(df['count'])}
    df = robjects.DataFrame(df)

    pt = ggplot2.ggplot(df) + \
        ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \
        ggplot2.xlim(-.5, mx+1) + \
        ggplot2.aes_string(x='count') + \
        ggplot2.ggtitle('TSS [Total = %s]' % n) + \
        ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx)

    pt_den = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='count', y='..density..') + \
        ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \
        ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \
        ggplot2.ggtitle('TSS [Total = %s]' % n) + \
        ggplot2.labs(x='Number of miRNA per TSS (max = %s)' % mx)

    ## NUMBER OF TSS PER MIRNA
    df, n, mx = _filt_dat(dat, 'mirna')
    df = {
        'count': robjects.IntVector(df['count']),
        'label': robjects.StrVector(df['label'])
    }
    df = robjects.DataFrame(df)

    _pm = ggplot2.ggplot(df) + \
        ggplot2.geom_histogram(binwidth=1, origin=-.5, alpha=.5, position="identity") + \
        ggplot2.xlim(-.5, mx+1) + \
        ggplot2.ggtitle('miRNA [Total = %s]' % n)

    _pm_den = ggplot2.ggplot(df) + \
        ggplot2.geom_density(binwidth=1, alpha=.5, origin=-.5) + \
        ggplot2.geom_histogram(binwidth=1, alpha=.33, position='identity', origin=-.5) + \
        ggplot2.ggtitle('miRNA [Total = %s]' % n)

    ## not split by label
    pm = _pm + ggplot2.aes_string(x='count')
    pm_den = _pm_den + ggplot2.aes_string(x='count', y='..density..')

    ## split by label
    pms = _pm + ggplot2.aes_string(x='count', fill='label')
    pm_dens = _pm_den + ggplot2.aes_string(
        x='count', fill='label', y='..density..')

    ## add xlabelling (need to be added after aes_string)
    _xlab = ggplot2.labs(x='Number of TSS per miRNA (max = %s)' % mx)
    pm += _xlab
    pm_den += _xlab
    pms += _xlab
    pm_dens += _xlab

    if independentpdf:
        grdevices = importr('grDevices')
        grdevices.pdf(fname)
        pt.plot()
        pt_den.plot()
        pm.plot()
        pm_den.plot()
        pms.plot()
        pm_dens.plot()
        grdevices.dev_off()
    else:
        pt.plot()
        pt_den.plot()
        pm.plot()
        pm_den.plot()
        pms.plot()
        pm_dens.plot()
    return
Exemplo n.º 37
0
import pandas as pd
import rpy2.robjects.packages as packages
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
# Importando o dataset do R, o mtcars
R = ro.r
datasets = packages.importr('datasets')
mtcars = packages.data(datasets).fetch('mtcars')['mtcars']
# Gerando o gráfico com ggplot
gp = ggplot2.ggplot(mtcars)
pyplot = (gp
      + ggplot2.aes_string(x = 'wt', y = 'mpg')
      + ggplot2.geom_point(ggplot2.aes_string(colour = 'qsec'))
      + ggplot2.scale_colour_gradient(low = "yellow", high = "red")
      + ggplot2.geom_smooth(method = 'auto')
      + ggplot2.labs(title = "mtcars", x = 'wt', y = 'mpg'))

pyplot.plot()

print("\nAnálise de Variância")
print("--------------------")
import rpy2.robjects as robjects

r = robjects.r

controle = robjects.FloatVector([4.17,5.58,5.18,6.11,4.50,4.61,
                                 5.17,4.53,5.33,5.14])
tratamento = robjects.FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,
                                   6.03,4.89,4.32,4.69])

grupo = r.gl(2, 10, 20, labels = ["Controle","Tratamento"])