def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
             image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({
        "sample":
        robjects.StrVector(samp_vector),
        "value":
        robjects.FloatVector(samp_set1_vals + samp_set2_vals)
    })

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
Exemplo n.º 2
0
def interval(locus_table, interval_table, intervals, loci, boxplot = True):
    qry = get_interval_query(intervals, loci, locus_table, interval_table)
    frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry))
    # because we're sorting by interval, which is a factor, we need to
    # explicitly re-sort the data by the first integer value
    # of the interval.  This is a bit cumbersome, because sorting
    # in R is less than pleasant.
    sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
    robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    if boxplot:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(**{
                    'outlier.size':0, 
                    'alpha':0.3
                    }
                ) + \
                ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \
                alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')

    else:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi',
                fill='locus') + ggplot2.geom_bar() + \
                ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
                ggplot2.opts(**{
                    'axis.text.x':ggplot2.theme_text(angle = -90,  hjust = 0),
                    'legend.position':'none'
                    }) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')
    return plot
Exemplo n.º 3
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
Exemplo n.º 4
0
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel):
    grdevices.pdf(file=plot_file_path)

    gp = ggplot2.ggplot(data)
    pp = gp \
        + ggplot2.aes_string(x='genotype', y='abundance') \
        + ggplot2.geom_boxplot() \
        + ggplot2.ggtitle(title) \
        + ggplot2.labs(x=xlabel, y=ylabel) \
        + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \
        + ggplot2.geom_point()

    pp.plot()

    grdevices.dev_off()
Exemplo n.º 5
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes,
                    'count': counts,
                    'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
 def plot_dupl_url(self):
     # -- pages per URL (URL-level duplicates)
     row_filter = ['url']
     data = self.histogr
     data = data[data['type'].isin(row_filter)]
     title = 'Pages per URL (URL-level duplicates)'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='count', y='frequency') \
         + ggplot2.geom_jitter() \
         + ggplot2.facet_wrap('crawl', ncol=5) \
         + ggplot2.labs(title=title, x='(duplicate) pages per URL',
                        y='log(frequency)') \
         + ggplot2.scale_y_log10()
     # + ggplot2.scale_x_log10()  # could use log-log scale
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_url_dupl.png')
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
Exemplo n.º 7
0
 def plot_dupl_url(self):
     # -- pages per URL (URL-level duplicates)
     row_filter = ['url']
     data = self.histogr
     data = data[data['type'].isin(row_filter)]
     title = 'Pages per URL (URL-level duplicates)'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='count', y='frequency') \
         + ggplot2.geom_jitter() \
         + ggplot2.facet_wrap('crawl', ncol=5) \
         + ggplot2.labs(title=title, x='(duplicate) pages per URL',
                        y='log(frequency)') \
         + ggplot2.scale_y_log10()
     # + ggplot2.scale_x_log10()  # could use log-log scale
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_url_dupl.png')
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({"sample":robjects.StrVector(samp_vector),
                                 "value":robjects.FloatVector(samp_set1_vals + samp_set2_vals)})

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()