Пример #1
0
def compare_sum_barplot(locus_table, interval_table, intervals, loci, names,
        rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    #pdb.set_trace()
    frame2 = robjects.r('''agg_data <- aggregate(pi ~ interval + db, data = data, sum)''')
    if len(intervals) > 1:
        sort_string = '''agg_data$interval <- factor(agg_data$interval,{})'''.format(order_intervals(frame2[0]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''agg_data'''))
    plot = gg_frame + \
        ggplot2.aes_string(
                x = 'interval', 
                y = 'pi',
                fill='factor(db)'
            ) + \
        ggplot2.geom_bar(**{
            'position':'dodge',
            'colour':'#767676',
            'alpha':0.6
            }
        ) + \
        ggplot2.scale_y_continuous('net phylogenetic informativeness') + \
        ggplot2.scale_x_discrete('interval (years ago)') + \
        ggplot2.scale_fill_brewer("database", palette="Blues")
    return plot
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     categories = []
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     data['size'] = data['size'].astype(float)
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts\n(before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if row_filter:
         data = data[data['type'].isin(row_filter)]
     data = data[['crawl', 'percentage', 'type']]
     categories = []
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             categories.append(replacement)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['type'] = pandas.Categorical(data['type'],
                                       ordered=True,
                                       categories=categories.reverse())
     ratio = 0.1 + len(data['crawl'].unique()) * .03
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path, height=int(7 * ratio), width=7)
     return p
Пример #4
0
def plot(data, filename, title, ggplotter, xid="N", yid="RunTime", factorid="Step"):
    df = make_dataframe(data, xid, yid, factorid)
    grdevices.pdf(file=filename, width=10, height=6)
    gp = ggplot2.ggplot(df)
    pp = gp + \
        ggplot2.aes_string(x=xid, y=yid) + \
        ggplot2.aes_string(size=.5) + \
        ggplotter() + \
        ggplot2.aes_string(colour='factor(%s)' % factorid) + \
        ggplot2.aes_string(fill='factor(%s)' % factorid) + \
        ggplot2.opts(title=title) + \
        ggplot2.scale_fill_brewer(palette="Set2") + \
        ggplot2.scale_colour_brewer(palette="Set2")
    pp.plot()
    grdevices.dev_off()
Пример #5
0
def compare_mean_boxplot(locus_table, interval_table, intervals, loci, names, rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    if len(intervals) > 1:
        sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(ggplot2.aes_string(fill = 'factor(db)'), **{
                    'outlier.size':3,
                    'outlier.colour':'#767676',
                    'outlier.alpha':0.3,
                    'alpha':0.6
                    }
                ) + \
                ggplot2.scale_y_continuous('mean phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)') + \
                ggplot2.scale_fill_brewer("database", palette='Blues')
    return plot
Пример #6
0
 def plot_stacked_bar(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
 def plot_fetch_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^fetcher:(?:aggr:)?', value):
             replacement = re.sub('^fetcher:(?:aggr:)?', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     # print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='percentage', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='RdYlGn', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=True)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='Percentage of Fetch Status',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
 def plot_crawldb_status(self, data, row_filter, img_file, ratio=1.0):
     if len(row_filter) > 0:
         data = data[data['type'].isin(row_filter)]
     for value in row_filter:
         if re.search('^crawldb:status:db_', value):
             replacement = re.sub('^crawldb:status:db_', '', value)
             data.replace(to_replace=value, value=replacement, inplace=True)
     data['size'] = data['size'].astype(float)
     print(data)
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl', y='size', fill='type') \
         + ggplot2.geom_bar(stat='identity', position='stack', width=.9) \
         + ggplot2.coord_flip() \
         + ggplot2.scale_fill_brewer(palette='Pastel1', type='sequential',
                                     guide=ggplot2.guide_legend(reverse=False)) \
         + GGPLOT2_THEME \
         + ggplot2.theme(**{'legend.position': 'bottom',
                            'aspect.ratio': ratio}) \
         + ggplot2.labs(title='CrawlDb Size and Status Counts (before crawling)',
                        x='', y='', fill='')
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     return p
Пример #9
0
def rest():
    df = q1_median_q3_rep_wide
    pops = ["pdc", "dc-cd11b", "dc-cd8a"]

    stats_l = []
    for stat, (popa, popb) in product(["Q1", "median", "Q3"],
                                      product(pops, pops)):
        print(stat, popa, popb)

        popa = "hsc"
        popb = "pdc"
        stat = "median"

        mw_u, pvalue = scipy.stats.mannwhitneyu(
            [0.8, 0.81, 0.79],
            [0.4, 0.39, 0.41],
            # df.query("Population == @popa")[stat].to_numpy(),
            # df.query("Population == @popb")[stat].to_numpy(),
            use_continuity=True,
            alternative="two-sided",
        )
        pvalue

        stats_l.append([stat, popa, popb, mw_u, pvalue])
    stats_df = pd.DataFrame(stats_l).set_axis(
        ["stat", "popA", "popB", "U", "pvalue"], axis=1)

    kruskal_format_means = pd.pivot(
        q1_median_q3_rep_wide.query("Population in @pops"),
        index="Population",
        columns="Replicate",
        values="mean",
    )

    import scikit_posthocs

    stat, p_value = scipy.stats.kruskal(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    dunn_res_df = scikit_posthocs.posthoc_dunn(
        kruskal_format_means.to_numpy(),
        p_adjust='fdr_bh',
        sort=True,
    )

    stat, pvalue = scipy.stats.f_oneway(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    import statsmodels

    df = kruskal_format_means.stack().reset_index()

    kruskal_format_means

    res = statsmodels.stats.multicomp.pairwise_tukeyhsd(
        df[0], df['Population'].to_numpy(), alpha=0.05)

    res.pvalues
    res.summary()

    # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F)

    plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"]

    results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt"
    point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png"

    q1_median_q3_rep_wide

    ggplot_data = (
        q1_median_q3_rep_long.query("Population in @plot_pops").sort_values(
            "value",
            ascending=False,
        ).groupby(["Population", "stat"]).apply(
            lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1))))

    g = (gg.ggplot(ggplot_data) + gg.aes_string(
        x="Population", y="value", group="group_order", color="stat") +
         gg.geom_point(position=gg.position_dodge(width=0.5), size=1) +
         mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x=''))
    a = 3

    rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6)))

    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        # additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(6),
    )

    q1_median_q3_rep_wide

    g = (
        gg.ggplot(
            q1_median_q3_rep_wide.query("Population in @plot_pops").assign(
                sample=lambda df: df["Population"].astype(str) + df[
                    "Replicate"].astype(str))) + gg.geom_boxplot(
                        gg.aes_string(
                            x="Population",
                            fill="Population",
                            group="sample",
                            lower="Q1",
                            upper="Q3",
                            middle="median",
                            ymin="min1",
                            ymax="max99",
                            # position=gg.position_dodge(width=0.5),
                        ),
                        stat="identity",
                    )
        # + mh_rpy2_styling.gg_paper_theme
        + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) +
        gg.scale_fill_brewer(guide=False))
    a = 3
    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(7),
    )
    # image_png2(g, (ut.cm(12), ut.cm(12)))

    beta_values.loc[:, ("hsc", "1")]
Пример #10
0
  # print str(a)
   try:
      if dsumFC.has_key(drug):
         dsumFC[drug]['Fold_Change'].append(math.log10(float(val)))
         dsumY[drug]['Year'].append(yr)
      else:
         dsumFC[drug]= {'Fold_Change': [math.log10(float(val)),]}
         dsumY[drug]= {'Year': [yr,]}
   except:
      print "FAILURE: dsumFC="+str(dsumFC)+"\n\ndsumY="+str(dsumY)
      sys.exit()
drugs = dsumFC.keys()

for x in drugs:
   od = rlc.OrdDict([('Fold_Change',robjects.FloatVector(dsumFC[x]['Fold_Change'])),('Year',robjects.FactorVector(dsumY[x]['Year'])),('Drug',robjects.FactorVector(x))])
grdevices.pdf(file="drugs.pdf",width=7,height=7)
   
   dataf = robjects.DataFrame(od)
   gp3 = ggplot2.ggplot(dataf)
   pp3 = gp3 + ggplot2.scale_fill_brewer(palette='BrBG',name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') +  ggplot2.geom_boxplot() + ggplot2.opts(title =  x+" Yearly Trend")
  # pp3 = gp3 + ggplot2.scale_colour_hue(h=base.c(180,270),name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') +  ggplot2.geom_boxplot() + ggplot2.opts(title =  x+" Yearly Trend")
   #+ ggplot2.scale_y_log10()
   pp3.plot()
   grdevices.dev_off()
   
f.close()
print "\nfinished\n"