Пример #1
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
	Plot the pore performance
	"""
    import math

    r = robjects.r
    r.library("ggplot2")
    grdevices = importr("grDevices")

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

            # make a data frame of the lists
    d = {
        "rownum": robjects.IntVector(range(1, 17) * 32),
        "colnum": robjects.IntVector(sorted(range(1, 33) * 16)),
        "log10_tot_bp": robjects.IntVector(pore_values),
        "labels": robjects.IntVector(flowcell_layout),
    }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = (
        gp
        + gg.aes_string(y="factor(rownum, rev(rownum))", x="factor(colnum)")
        + gg.geom_point(gg.aes_string(color="log10_tot_bp"), size=7)
        + gg.geom_text(gg.aes_string(label="labels"), colour="white", size=2)
        + gg.scale_colour_gradient2(low="black", mid="black", high="red")
        + gg.coord_fixed(ratio=1.4)
        + gg.labs(x=gg.NULL, y=gg.NULL)
    )

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=11, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width=11, height=8.5, units="in", res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print ("Type enter to exit.")
        raw_input()
Пример #2
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
    Plot the pore performance
    """
    import math
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

    # make a data frame of the lists
    d = {'rownum': robjects.IntVector(range(1,17)*32),
         'colnum': robjects.IntVector(sorted(range(1,33)*16)),
         'log10_tot_bp': robjects.IntVector(pore_values),
         'labels': robjects.IntVector(flowcell_layout)
         }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \
                     x = 'factor(colnum)') \
        + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \
        + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \
        + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \
        + gg.coord_fixed(ratio=1.4) \
        + gg.labs(x=gg.NULL, y=gg.NULL)

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width = 11, height = 8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width = 11, height = 8.5,
                units = "in", res = 300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
 def plot_similarity_matrix(self, item_type, image_file, title):
     '''Plot similarities of crawls (overlap of unique items)
     as heat map matrix'''
     data = defaultdict(dict)
     n = 1
     for crawl1 in self.similarity[item_type]:
         for crawl2 in self.similarity[item_type][crawl1]:
             similarity = self.similarity[item_type][crawl1][crawl2]
             data['crawl1'][n] = MonthlyCrawl.short_name(crawl1)
             data['crawl2'][n] = MonthlyCrawl.short_name(crawl2)
             data['similarity'][n] = similarity
             data['sim_rounded'][n] = similarity  # to be rounded
             n += 1
     data = pandas.DataFrame(data)
     print(data)
     # select median of similarity values as midpoint of similarity scale
     midpoint = data['similarity'].median()
     decimals = 3
     textsize = 2
     minshown = .0005
     if (data['similarity'].max()-data['similarity'].min()) > .2:
         decimals = 2
         textsize = 2.8
         minshown = .005
     data['sim_rounded'] = data['sim_rounded'].apply(
         lambda x: ('{0:.'+str(decimals)+'f}').format(x).lstrip('0')
         if x >= minshown else '0')
     print('Median of similarities for', item_type, '=', midpoint)
     matrix_size = len(self.similarity[item_type])
     if matrix_size > self.MAX_MATRIX_SIZE:
         n = 0
         for crawl1 in sorted(self.similarity[item_type], reverse=True):
             short_name = MonthlyCrawl.short_name(crawl1)
             if n > self.MAX_MATRIX_SIZE:
                 data = data[data['crawl1'] != short_name]
                 data = data[data['crawl2'] != short_name]
             n += 1
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='crawl2', y='crawl1',
                              fill='similarity', label='sim_rounded') \
         + ggplot2.geom_tile(color="white") \
         + ggplot2.scale_fill_gradient2(low="red", high="blue", mid="white",
                                        midpoint=midpoint, space="Lab") \
         + GGPLOT2_THEME \
         + ggplot2.coord_fixed() \
         + ggplot2.theme(**{'axis.text.x':
                            ggplot2.element_text(angle=45,
                                                 vjust=1, hjust=1)}) \
         + ggplot2.labs(title=title, x='', y='') \
         + ggplot2.geom_text(color='black', size=textsize)
     img_path = os.path.join(PLOTDIR, image_file)
     p.save(img_path)
     return p
Пример #4
0
def generate_histogram(subgroups_to_sses_to_n_count, tname, file_name):
    columns_to_data = {'subgroup': [], tname: [], 'count': []}
    max_count = 0
    for subgroup, sses_to_n_count in subgroups_to_sses_to_n_count.items():
        for ss, n_count in sses_to_n_count.items():
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data[tname].append(ss)
            columns_to_data['count'].append(n_count)
            if n_count > max_count:
                max_count = n_count
    r_columns_to_data = {
        'subgroup':
        ro.FactorVector(columns_to_data['subgroup'],
                        levels=ro.StrVector(
                            _sort_subgroup(set(columns_to_data['subgroup'])))),
        tname:
        ro.StrVector(columns_to_data[tname]),
        'count':
        ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    max_count = int(max_count / 1000 * 1000 + 1000)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))

    grdevices.png(file=histogram_file_path, width=1200, height=800)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='subgroup', y='count', fill=tname) + \
         ggplot2.geom_bar(position="dodge",width=0.8, stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=40)}) + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=40,angle=45)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=40)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'), size=6, angle=35, hjust=-0.1,
                           position=ggplot2.position_dodge(width=0.8),
                           vjust=-0.2)

    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
Пример #5
0
def generate_step3_9_n_count_histogram(place_type_pos_type_to_count,
                                       file_name):
    columns_to_data = {'place': [], 'pos': [], 'count': []}
    max_count = 0
    for place_pos_type, n_count in place_type_pos_type_to_count.items():
        place_type, pos_type = place_pos_type.split('_')
        columns_to_data['place'].append(place_type)
        columns_to_data['pos'].append(pos_type)
        columns_to_data['count'].append(n_count)
        if n_count > max_count:
            max_count = n_count
    r_columns_to_data = {
        'place': ro.StrVector(columns_to_data['place']),
        'pos': ro.StrVector(columns_to_data['pos']),
        'count': ro.IntVector(columns_to_data['count'])
    }
    df = ro.DataFrame(r_columns_to_data)

    if max_count > 1000:
        max_count = int(max_count / 1000 * 1000 + 1000)
    else:
        max_count = int(max_count / 100 * 100 + 100)
    histogram_file_path = os.path.join(OUTPUT_PATH, file_name)
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", histogram_file_path,
                   df))
    grdevices.png(file=histogram_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.aes_string(x='pos', y='count', fill='place') + \
         ggplot2.geom_bar(position="dodge", stat="identity") + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.scale_y_continuous(expand=ro.IntVector([0, 0]),
                                    limits=ro.IntVector([0, max_count])) + \
         ggplot2.geom_text(ggplot2.aes_string(label='count'),
                           position=ggplot2.position_dodge(width=0.8), size=10, angle=35, hjust=-0.2,
                           vjust=-0.5)
    pp.plot()
    logging.info(str.format("Output step3 file {}", histogram_file_path))
    grdevices.dev_off()
Пример #6
0
onlysurf=robjects.r('onlysurface<-onlyfilt[ which("Sampledepth" =="0.30487806"),]')
#print "onlysurf"
#print onlysurf

#colours2 = grdevices.topo_colors(10)
colours2 = grdevices.cm_colors(10)
#colours2 = grdevices.rainbow(20)
#print colours2
#colours = ggplot2.rainbow(54)
#bins=10
gp = ggplot2.ggplot(onlysurf)
#gp = ggplot2.ggplot(onlyfilts)

gp=gp+ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station")
gp=gp+ggplot2.scale_colour_gradientn(colours=colours2)
gp=gp+ggplot2.geom_text(col="black",offset = 10)
gp=gp+ggplot2.geom_point(position="jitter")
gp=gp+ggplot2.ggtitle(graphtitle)

robjects.r('library(ggmap)')
robjects.r('library(mapproj)')
robjects.r('map <- get_map(location = "Europe", zoom = 4)')
robjects.r('ggmap(map)')

#robjects.r('library(maps)')

#robjects.r('map("world", interior = FALSE)')

#robjects.r('map("state", boundary = FALSE, col="gray", add = TRUE)')
#gp.plot()
Пример #7
0
    iris_py = pandas.read_csv("/home/yarden/iris.csv")
    iris_py = iris_py.rename(columns={"Name": "Species"})
    corrs = []
    from scipy.stats import spearmanr
    for species in set(iris_py.Species):
        entries = iris_py[iris_py["Species"] == species]
        c = spearmanr(entries["SepalLength"], entries["SepalWidth"])
        print "c: ", c

    # compute r.cor(x, y) and divide up by Species
    # Assume we get a vector of length Species saying what the
    # correlation is for each Species' Petal Length/Width
    p = ggplot2.ggplot(iris) + \
        ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \
        ggplot2.facet_wrap(Formula("~Species")) 
    p.plot()
    r["dev.off"]()    

    sys.exit(1)
    grdevices = importr('grDevices')
    ggplot2.theme_set(ggplot2.theme_bw(12))

    p = ggplot2.ggplot(iris) + \
        ggplot2.geom_point(ggplot2.aes_string(x="Sepal.Length", y="Sepal.Width")) + \
        ggplot2.facet_wrap(Formula('~ Species'), ncol=2, nrow = 2) + \
        ggplot2.geom_text(aes_string(x="Sepal.Length", y="Sepal.Width"), label="t") + \
        ggplot2.GBaseObject(r('ggplot2::coord_fixed')()) # aspect ratio
    p.plot()