class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f", "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e" ] colors_dark = [ "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a" ] # mt = theme(panel_background=element_rect(fill=bgcolor) # ,plot_background=element_rect(fill=bgcolor) # , axis_text_x = element_text(color="black") # , axis_text_y = element_text(color="black") # , strip_margin_y=0.05 # , strip_margin_x=0.5) mt = theme_bw() + theme(panel_border=element_blank()) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#ce4257", "#aad576") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def scale_fill_gradient_energinet(low: int = 0, high: int = 2, **kwargs) -> p9.scale_fill_gradient: """ Create a two-point fill gradient. Parameters: low (int): Index of low color. high (int): Index of high color. """ pal = endktheme.colors.excel() return p9.scale_fill_gradient(low=pal[low], high=pal[high], **kwargs)
def plot_restaurants_per_neighborhood(filepath, restaurant_data_file, pittsburgh_shapefile): mexican_restaurants = pd.read_csv(filepath + restaurant_data_file) gdf = gpd.GeoDataFrame( mexican_restaurants, geometry=gpd.points_from_xy(mexican_restaurants.longitude, mexican_restaurants.latitude), ) restaurant_locations = gdf.filter(items=["geometry"]) # import Pittsburgh neighborhood shapefile neighborhood_polygons = gpd.read_file(pittsburgh_shapefile).filter( items=["hood", "hood_no", "geometry"]) # spatial join to figure out which neighborhood each restaurant is in restaurants_in_polys = gpd.sjoin(restaurant_locations, neighborhood_polygons, how="inner", op="intersects") restaurants_counted = restaurants_in_polys.groupby( "hood_no").count().reset_index() restaurants_in_hoods = restaurants_counted.filter( items=["hood_no", "hood"]) restaurants_in_hoods.rename(columns={"hood": "num_restaurants"}, inplace=True) restaurants_per_shape = gpd.GeoDataFrame( pd.merge(neighborhood_polygons, restaurants_in_hoods, how="left")) restaurant_map = (p.ggplot(restaurants_per_shape) + p.geom_map(p.aes(fill="num_restaurants")) + p.scale_colour_gradient(low="white", high="black") + p.theme( panel_background=p.element_rect(fill="white"), axis_text_x=p.element_blank(), axis_text_y=p.element_blank(), axis_ticks_major_x=p.element_blank(), axis_ticks_major_y=p.element_blank(), )) + p.scale_fill_gradient( low="#efefef", high="#073763", name="# Restaurants") restaurant_map.save("restaurant_map.png")
class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#2a9d8f", "#797d62", "#3a6ea5" ] mt = theme(panel_background=element_rect(fill=bgcolor), plot_background=element_rect(fill=bgcolor), axis_text_x=element_text(color="black"), axis_text_y=element_text(color="black"), strip_margin_y=0.05, strip_margin_x=0.5) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#aad576", "#ce4257") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def generate_map(data, region, value_field, iso_field='iso', scale_params=None, plot_na_dots=False, tolerance=None, plot_size=8, out_region_color='#f0f0f0', na_color='#aaaaaa', line_color='#666666', projection=None): """ This function returns a map plot with the specified options. :param pandas.DataFrame data: Data to be plotted. :param str region: Region to center the map around. Countries outside the chosen region will be obscured. :param str value_field: Column of *data* with the values to be plotted. :param str iso_field: Column of *data* with the ISO3 codes for each country. :param dict scale_params: Dictionary of parameters to be passed to the ggplot corresponding color scale (continuous or discrete). :param bool plot_na_dots: Whether to plot the dots for small countries if said country doesn't have data available. :param int tolerance: Coordinate tolerance for polygon simplification, a higher number will result in simpler polygons and faster rendering (see DEFAULT_TOLERANCES). :param int plot_size: Size of the plot, which determines the relative sizes of the elements within. :param str out_region_color: Hex color of the countries that are out of the specified region. :param str na_color: Hex color of the countries with no data available. :param str line_color: Color of the country borders. :param str projection: Kind of map projection to be used in the map. Currently, Oceania (XOX) is only available in ESPG:4326 to enable wrapping. :returns: a ggplot-like plot with the map :rtype: plotnine.ggplot """ if projection is None: if region == 'XOX': projection = 'epsg4326' else: projection = 'robinson' if projection not in PROJECTION_DICT.keys(): raise ValueError('Projection "{}" not valid'.format(projection)) if scale_params is None: scale_params = {} if region not in REGION_BOUNDS[projection]: raise ValueError( '"region" not available. Valid regions are: {}'.format(', '.join( REGION_BOUNDS[projection].keys()))) if tolerance is None: tolerance = DEFAULT_TOLERANCES[projection][region] countries = GeoDataFrame.from_file( os.path.join(os.path.dirname(__file__), 'data/world-countries.shp')) # To plot Oceania we need the original EPSG:4326 to wrap around the 180º # longitude. In other cases transform to the desired projection. if region == 'XOX': countries.crs['lon_wrap'] = '180' # Wrap around longitude 180º XOX_countries = countries['continent'] == 'XOX' countries[XOX_countries] = countries[XOX_countries].to_crs( countries.crs) centroids = countries[XOX_countries].apply( lambda row: row['geometry'].centroid, axis=1) countries.loc[XOX_countries, 'lon'] = [c.x for c in centroids] countries.loc[XOX_countries, 'lat'] = [c.y for c in centroids] else: if projection != 'epsg4326': countries = countries.to_crs(PROJECTION_DICT[projection]) centroids = countries.apply(lambda row: row['geometry'].centroid, axis=1) countries['lon'] = [c.x for c in centroids] countries['lat'] = [c.y for c in centroids] countries['geometry'] = countries['geometry'].simplify(tolerance) upper_left, lower_right = REGION_BOUNDS[projection][region] limits_x = [upper_left[0], lower_right[0]] limits_y = [lower_right[1], upper_left[1]] ratio = (limits_x[1] - limits_x[0]) / (limits_y[1] - limits_y[0]) plot_data = pd.merge(countries, data, how='left', left_on='iso', right_on=iso_field) map_bounds = REGION_BOUNDS['epsg4326'][region] map_area = ((map_bounds[1][0] - map_bounds[0][0]) * (map_bounds[0][1] - map_bounds[1][1])) plot_data['plot_dot'] = (plot_data['pol_area'] < DOT_THRESHOLD * map_area) if not plot_na_dots: plot_data['plot_dot'] &= ~pd.isnull(plot_data[value_field]) if region != 'XWX': in_region = ((~pd.isnull(plot_data[value_field])) & (plot_data['continent'] == region)) in_region_missing = ((pd.isnull(plot_data[value_field])) & (plot_data['continent'] == region)) out_region = plot_data['continent'] != region else: in_region = ~pd.isnull(plot_data[value_field]) in_region_missing = pd.isnull(plot_data[value_field]) out_region = np.repeat(False, len(plot_data)) if plot_data[value_field].dtype == 'object': # Assume discrete values fill_scale = scale_fill_brewer(**scale_params, drop=False) else: # Assume continuous values fill_scale = scale_fill_gradient(**scale_params) plot_data_values = plot_data[in_region] plot_data_missing = plot_data[in_region_missing] plot_data_out_region = plot_data[out_region] dots_region = plot_data_values[plot_data_values['plot_dot']] dots_region_missing = plot_data_missing[plot_data_missing['plot_dot']] dots_out_region = plot_data_out_region[plot_data_out_region['plot_dot']] plt = ( ggplot() + geom_map(plot_data_values, aes(fill=value_field), color=line_color, size=0.3) + geom_map( plot_data_missing, aes(color='plot_dot'), fill=na_color, size=0.3) + geom_map(plot_data_out_region, fill=out_region_color, color=line_color, size=0.3) + geom_point(dots_region, aes(x='lon', y='lat', fill=value_field), size=3, stroke=.1, color=line_color) + geom_point(dots_region_missing, aes(x='lon', y='lat'), fill=na_color, size=3, stroke=.1, color=line_color) + geom_point(dots_out_region, aes(x='lon', y='lat'), fill=out_region_color, size=3, stroke=.1, color=line_color) + scale_x_continuous(breaks=[], limits=limits_x) + scale_y_continuous(breaks=[], limits=limits_y) + theme( figure_size=(plot_size * ratio, plot_size), panel_background=element_rect(fill='white', color='black'), # panel_border=element_rect(fill='white', # color='black', # size=.1), legend_background=element_rect( fill="white", color='black', size=.5), legend_box_just='left') + xlab('') + ylab('')) if len(plot_data_values.index) > 0: plt += fill_scale plt += scale_color_manual(name=' ', values=[line_color], breaks=[False], labels=['No data available']) if plot_data[value_field].dtype == 'object': plt += guides(fill=guide_legend(override_aes={'shape': None})) return { 'plot': plt, 'ratio': ratio, }
axis_title=gg.element_text(size=14))) p # In[13]: figure_file = os.path.join('figures', 'replicates_filtration_results.pdf') gg.ggsave(p, figure_file, height=5.5, width=6.5, dpi=500) # In[14]: p = (gg.ggplot( filter_counts_df, gg.aes(x='lane', y='COSMIC_count', fill='filter_min_depth_count')) + gg.geom_bar(stat='identity', position='dodge') + gg.geom_text( gg.aes(y=10, label='log_mut_count'), size=5, colour='white') + gg.scale_fill_gradient(low='blue', high='red', name='All Variants') + gg.facet_wrap('~ final_id') + gg.xlab('Lane') + gg.ylab('Number of COSMIC Variants') + gg.theme_bw() + gg.theme(axis_text_x=gg.element_text(angle='90'), axis_text=gg.element_text(size=8), axis_title=gg.element_text(size=14))) p # In[15]: figure_file = os.path.join('figures', 'replicates_cosmic_mutcount_results.pdf') gg.ggsave(p, figure_file, height=5.5, width=6.5, dpi=500) # ## Process Merged Files - These are the Final VCFs to Interpret # In[8]: