def plot_network(res): """Plot network of multivariate TE between processes. Plot graph of the network of (multivariate) interactions between processes (e.g., multivariate TE). The function uses the networkx class for directed graphs (DiGraph) internally. Plots a network and adjacency matrix. Args: res : dict output of multivariate_te.analyse_network() Returns: instance of a directed graph class from the networkx package (DiGraph) """ try: res = res['fdr'] except KeyError: print('plotting non-corrected network!') g = generate_network_graph(res) print(g.node) f, (ax1, ax2) = plt.subplots(1, 2) adj_matrix = nx.to_numpy_matrix(g) cmap = sns.light_palette('cadetblue', n_colors=2, as_cmap=True) sns.heatmap(adj_matrix, cmap=cmap, cbar=False, ax=ax1, square=True, linewidths=1, xticklabels=g.nodes(), yticklabels=g.nodes()) ax1.xaxis.tick_top() plt.setp(ax1.yaxis.get_majorticklabels(), rotation=0) nx.draw_circular(g, with_labels=True, node_size=300, alpha=1.0, ax=ax2, node_color='cadetblue', hold=True, font_weight='bold') plt.show() return g
def do_pairplots(counts, base_dir, sample): """ Produces three pairplots - one for each group and a joint plot. """ markers = ["o", "s"] r, total_gems, assigned_gems, assigned_gems_by_para = assign_gems(counts) df = pd.DataFrame.from_dict(r) unique_gems = find_unique_gems(assigned_gems_by_para) num_unique = len(unique_gems) num_not_unique = len(df) - num_unique unique_bins = ["{:,} unique".format(num_unique) if x in unique_gems else "{:,} not unique".format(num_not_unique) for x in df["GemId"]] df["Unique mappings"] = unique_bins sns_plot = sns.pairplot(df, hue="Unique mappings", markers=markers, plot_kws=dict(s=10)) sns_plot.fig.text(0.87, 0.6, "{:,} Total Gems".format(len(total_gems))) sns_plot.savefig(os.path.join(base_dir, "{}_combined_plot.pdf".format(sample)), format="pdf") # now re-label to simply unique/not unique and make separate pairplots unique_simple_bins = ["Unique" if x in unique_gems else "Not Unique" for x in df["GemId"]] df["Unique mappings"] = unique_simple_bins for i, subset in enumerate(["Unique", "Not Unique"]): df2 = df[df["Unique mappings"] == subset] color = sns.color_palette()[i] cmap = sns.light_palette(color, as_cmap=True) sns_plot = sns.pairplot(df2, markers=markers[i], plot_kws=dict(color=color, s=10)) sns_plot.map_lower(sns.kdeplot, cmap=cmap, n_levels=50) p = subset.replace(" ", "_").lower() sns_plot.savefig(os.path.join(base_dir, "{}_{}_combined_plot.pdf".format(sample, p)), format="pdf") plt.close('all')
def cmap_from_color(color, dark=False): ''' Generates a matplotlib colormap from a single color. Colormap will be built, by default, from white to ``color``. Args: color: Can be one of several things: 1. Hex code 2. HTML color name 3. RGB tuple dark (bool): If ``True``, colormap will be built from ``color`` to black. Default is ``False``, which builds a colormap from white to ``color``. Returns: colormap: A matplotlib colormap ''' if dark: return sns.dark_palette(color, as_cmap=True) else: return sns.light_palette(color, as_cmap=True)
def mag_vs_length(): # bar = bar[(bar.kind == 'Composite')] # sns.lmplot(x='Mr', y='length_scaled', data=bar, hue='kind', palette=flatui, scatter_kws={'s': 9}, fit_reg=False, size=10).set(ylim=(0,1), xlim=(-18, -23)) for ax in range(1, 6): plt.subplot(2, 3, ax) sample = bar[bar.kind == kind[ax]] sns.kdeplot(sample.length_scaled, sample.Mr, cmap=sns.light_palette(color=flatui[1], as_cmap=True), shade=True, shade_lowest=True).set(xlim=(0, 1.1), ylim=(-18, -23), title=kind[ax])
def global_background_gradient(s, m, M, cmap=None, low=0, high=0): if cmap is None: cmap = seaborn.light_palette("seagreen", as_cmap=True) rng = M - m norm = colors.Normalize(m - (rng * low), M + (rng * high)) normed = norm(s.values) c = [colors.rgb2hex(x) for x in cmap(normed)] return ['background-color: %s' % color for color in c]
def get_means(): ol = pd.read_sql_table('olives', db.engine) desc = ol.groupby('area_main').mean() desc = desc.drop(desc.columns[[0, 1, 2]], axis=1) cm = sns.light_palette("green", as_cmap=True) s = desc.style.background_gradient(cmap=cm) s = s.set_properties(**{'cellpadding': '30', 'border-color': 'white'}) return s.render()
def hexbin(x, y, color="purple", **kwargs): """Seaborn-compatible hexbin plot. See also: http://seaborn.pydata.org/tutorial/axis_grids.html#mapping-custom-functions-onto-the-grid """ if HAS_SEABORN: cmap = sns.light_palette(color, as_cmap=True) else: cmap = "Purples" plt.hexbin(x, y, cmap=cmap, **kwargs)
def build_table(dic): colLabel = dic['colLabel'] filterName = dic['filterName'] filterQuery = dic['filterQuery'] filterValue = dic['filterValue'] aggregationCol = dic['aggregationCol'] conn = sqlite3.connect('USIODB.db') if (filterQuery == 'contains'): sql = "SELECT Period,SUM({}),AVG({}),MAX({}),MIN({}),COUNT({}),{} FROM USIODB WHERE {} LIKE '%{}%' ".format(to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(aggregationCol), to_valid_query(filterName), filterValue.strip()) elif (filterQuery == 'does not contain'): sql = "SELECT Period,SUM({}),AVG({}),MAX({}),MIN({}),COUNT({}),{} FROM USIODB WHERE {} NOT LIKE '%{}%'".format(to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(aggregationCol), to_valid_query(filterName), filterValue.strip()) else: sql = "SELECT Period,SUM({}),AVG({}),MAX({}),MIN({}),COUNT({}),{} FROM USIODB WHERE {} {} {}".format(to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(colLabel), to_valid_query(aggregationCol), to_valid_query(filterName), filterQuery, filterValue) sql+=" GROUP BY {}".format(to_valid_query(aggregationCol)) print(sql) cm = sns.light_palette("yellow", as_cmap=True) df = (pd.read_sql_query(sql, conn) .loc[:4] .style .background_gradient(cmap='viridis', low=.5, high=0) .highlight_null('red') .background_gradient(cmap=cm) ) conn.close() return df.render()
def shot_chart(x, y, title="", kind="scatter", color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", outer_lines=False, court_lw=1, flip_court=False, kde_shade=True, hex_gridsize=None, ax=None, **kwargs): """ Returns an Axes object with player shots plotted. TODO: explain the parameters """ if ax is None: ax = plt.gca() if cmap is None: cmap = sns.light_palette(color, as_cmap=True) if not flip_court: ax.set_xlim(xlim) ax.set_ylim(ylim) else: ax.set_xlim(xlim[::-1]) ax.set_ylim(ylim[::-1]) ax.tick_params(labelbottom="off", labelleft="off") ax.set_title(title, fontsize=18) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) if kind == "scatter": ax.scatter(x, y, c=color, **kwargs) elif kind == "kde": sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs) ax.set_xlabel('') ax.set_ylabel('') elif kind == "hex": if hex_gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) hex_gridsize = int(np.mean([x_bin, y_bin])) ax.hexbin(x, y, gridsize=hex_gridsize, cmap=cmap, **kwargs) else: raise ValueError("kind must be 'scatter', 'kde', or 'hex'.") return ax
def apply_global_background_gradient(df, override_min=None, override_max=None, cmap=None, subset=None): if cmap is None: seagreen = seaborn.light_palette("seagreen", as_cmap=True) cmap = seagreen df = df.apply( global_background_gradient, cmap=cmap, m=override_min if override_min is not None else df.data.min().min(), M=override_max if override_max is not None else df.data.max().max(), subset=subset, ) return df
def violin_by_reso(wp): p1 = sns.color_palette('Paired')[0:4] p2 = [tuple(i) for i in sns.light_palette("red", 10)] p3 = [tuple(i) for i in sns.light_palette("blue", 10)] pal = p1 + p2[1:-1] + p3[1:-1] eh = wp.minor_xs('EH') amb = wp.minor_xs('0.025') reso_bins = np.array([0.9, 1.0, 2.0, 3.0, 4.0]) labels=["<1.0", "<2.0", "<3.0", "<4.0"] reso_bins = np.arange(.9,4.5,.4) labels=[i+.2 for i in reso_bins][:-1] eh['reso_range'] = pd.cut(eh['Reso'], reso_bins, labels=labels) amb['reso_range'] = pd.cut(amb['Reso'], reso_bins, labels=labels) for feature in wp.items: # feature = 'clash' # import code ; code.interact(local=dict(globals(), **locals())) sns.boxplot(eh[feature], eh.reso_range, color = pal[11]) sns.boxplot(amb[feature], amb.reso_range, color = pal[19], alpha=0.5) # plt.ylim(-10,60) plt.savefig('violin_reso_plots/%s_reso.png' %feature) plt.clf()
def jointgrid(x, y, kind="scatter", data = None, title="", color="b", xlim=(-250, 250), ylim=(422.5,-47.5), court_color="gray", joint_color="b", marginals_color="b", chart = None, joint_kde_shade=True, marginals_kde_shade=True, court_lw=1, joint_kws=None, marginal_kws=None, outer_lines=False, cmap=None, space=0, set_size_inches=(12,11), **kwargs): joint_kws = {} joint_kws.update(kwargs) marginal_kws = {} cmap = sns.light_palette(color, as_cmap=True) grid = sns.JointGrid(x,y, data=None, xlim=xlim, ylim=ylim, space=space) if kind=="kde": grid = grid.plot_joint(sns.kdeplot, cmap=cmap, shade=joint_kde_shade, **joint_kws) else: grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws) grid = grid.plot_marginals(sns.distplot, color=marginals_color, **marginal_kws) grid.fig.set_size_inches(set_size_inches) ax = grid.fig.get_axes()[0] ax.set_xlim(xlim) ax.set_ylim(ylim) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) ax.set_xlabel("") ax.set_ylabel("") ax.tick_params(labelbottom="off", labelleft="off") title = chart.playername + " FGA \n" + chart.season + " Regular Season" ax.set_title(title, y=1.2, fontsize=18) return grid
def to_dataframe(self, cmap=None): """ Return a pandas dataframe version of the `ErrorMatrix`. Do not use with the `with_labels` property. Labels are included without that and it will make this function fail. Parameters ---------- cmap : matplotlib colormap or `True` If `None` (default), the returned dataframe will not be styled with background colors. Otherwise cell colors will be added to the error matrix when the data frame is viewed in Jupyter Notebook (aka IPython Notebook). If `True` one of two default colormaps will be used. First, an attempt will be made to get a colormap from seaborn. If seaborn is not installed, an attempt will be made to get a matplotlib colormap (that's a bit uglier). The use can also supply their own colormap instead. Returns ------- pandas dataframe or dataframe styler A dataframe representation of the error matrix that looks nice in a Jupyter Notebook. If a cmap is applied, a `pandas.core.style.Styler` object will be returned. The dataframe can be accessed via the `.data` property of the `Styler`. """ import pandas as pd df = pd.DataFrame(self, columns=self.categories, index=self.categories) df = df.replace('None',np.nan) if cmap is None: return df else: if cmap is True: # Try to provide a default color map try: from seaborn import light_palette cmap = light_palette('steelblue', as_cmap=True) except ImportError: # seaborn is less common than matplotlib. I don't really # want to make either one a dependency for this module. import matplotlib.pyplot as plt cmap = plt.cm.GnBu subst = df.columns.difference(['Totals','Accuracy']) return df.style.background_gradient(cmap=cmap, subset=(subst, subst))
def shot_chart_jointplot(x, y, data=None, title="", kind="scatter", color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), space=0, court_color="gray", outer_lines=False, court_lw=1, flip_court=False, set_size_inches=(12, 11), **kwargs): """ Returns a seaborn JointGrid using sns.jointplot TODO: Better documentation """ # If a colormap is not provided, then it is based off of the color if cmap is None: cmap = sns.light_palette(color, as_cmap=True) plot = sns.jointplot(x, y, data=None, stat_func=None, kind=kind, space=0, color=color, cmap=cmap, **kwargs) plot.fig.set_size_inches(set_size_inches) # A joint plot has 3 Axes, the first one called ax_joint # is the one we want to draw our court onto and adjust some other settings ax = plot.ax_joint if not flip_court: ax.set_xlim(xlim) ax.set_ylim(ylim) else: ax.set_xlim(xlim[::-1]) ax.set_ylim(ylim[::-1]) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) # Get rid of axis labels and tick marks ax.set_xlabel('') ax.set_ylabel('') ax.tick_params(labelbottom='off', labelleft='off') # Add a title ax.set_title(title, y=1.2, fontsize=18) return plot
def posts_by_category(df): sns.set_context('talk', font_scale=1.5) cat_counts = df.groupby('category_code').url.count() x = cat_counts.index y = cat_counts.values f, ax = plt.subplots(1,1, figsize=(10,8)) ax.set_ylabel('Postings') sns.barplot(x,y,palette=sns.light_palette('#008080', reverse=True, n_colors=10), linewidth=0) ax.set_xlabel('') ax.set_title('Postings by Category') for item in ax.get_xticklabels(): item.set_rotation(15) sns.despine(bottom=True, right=True, trim=True) percentage = [np.round((float(y_)*100/sum(y)),2) for y_ in y] for i,p in enumerate(ax.patches): height = p.get_height() ax.text(p.get_x(), p.get_height()+ 10, '{}%'.format(percentage[i]), fontsize=20) plt.show()
def plot_projection(self, colors = None, palette = 'husl', plot_density = False): if not hasattr(self, 'cluster_labels'): sns.set(style="white") g = sns.PairGrid(self.features, diag_sharey=False) g.map_upper(pyplot.scatter) g.map_diag(sns.kdeplot, lw=3) if plot_density: g.map_lower(sns.kdeplot, cmap="Blues_d") else: if colors is None: colors = sns.color_palette(palette, len(self.cluster_labels)) df = pd.concat([self.features, self.labels], axis=1) hue_kws = {'cmap':[sns.light_palette(color, as_cmap=True) for color in colors]} g = sns.PairGrid(df, diag_sharey=False,hue='label', vars = self.features.columns, hue_kws=hue_kws) g.map_diag(sns.kdeplot, lw=3) g.map_upper(pyplot.scatter) if plot_density: g.map_lower(sns.kdeplot)
def ClusterMap(self, label=None, fig_title='', transform='log'): # Use seaborn's clustermap function to plot a heatmap of relative abundances and do hiearchical clustering # Optionally, label the samples by the specified label (in self.abun_df, samples are in the rows) # Transform the values according to what's specified in transform # scale fontsize so labels are visible.. sb.set(font_scale=0.6) cmap = sb.light_palette('navy', as_cmap=True) if transform == 'log': data = np.log(raw2abun(self.raw_df + 1)) elif transform == 'presence_absence': presence_map = lambda x: 1 if x else 0 data = self.raw_df.applymap(presence_map) elif transform == 'logit': def logit(X): x = np.log(X/(1-X)) return x data = logit(raw2abun(self.raw_df + 1)) else: data = copy(self.abun_df) if label: rows = self.abun_df.index row_vals = [self.meta_df.loc[smpl, label] for smpl in rows] row_pal = sb.cubehelix_palette(len(set(row_vals))) row_dict = dict(zip(map(str, set(row_vals)),row_pal)) row_colors = pd.Series(row_vals).map(row_dict) fg = sb.clustermap(data, row_colors=row_colors, cmap=cmap) else: fg = sb.clustermap(data, cmap=cmap) if fig_title: if label: new_title = fig_title.split('.')[0] + '_' + label + fig_title.split('.')[1] else: new_title = fig_title fg.savefig(new_title) plt.close() sb.set(font_scale=1.0) return fg
def topic_brand_hm(review_inf, brands, topics, data): ''' This function creates a heatmap of the topic review rates of all the topics vs all the brands. Inputs: review_inf: The dictionary with the text and dataframe for each topic, the second output of get_topics_and_reviews(). brands: A list of all the unique brands that appear in the data. topics: A list of all the topics output by get_topics_and_reviews(). data: The dataframe from which all the topics and reviews were generated. Outputs: A seaborn heatmap described above.''' df = pd.DataFrame() for brand in brands: topic_pcts = [] for topic in topics: df_t = review_inf[topic]['df'] tot = data[data['Brand']==brand]['Brand'].shape[0] if tot > 0: topic_pcts.append(100* df_t[df_t['Brand']==brand]['Brand'].shape[0] / tot) else: topic_pcts.append(0) df[brand] = topic_pcts df.index = topics cmap = sns.light_palette((147, 100, 39), input="husl",as_cmap=True) return sns.heatmap(df,cmap=cmap)
def sub_heatmap_plot(df, gs, title, loc, total, satuation, vmin, vmax, flag, cancertype, hm): # plot each heatmap panel all_values = [i for col in df.columns for i in df[col].values] df = df.clip(upper=np.percentile(all_values, satuation)) ax = plt.subplot(gs[0, loc]) #pal = sns.light_palette(cancertype_heatmap_color(cancertype),as_cmap=True) pal = sns.light_palette('blue', as_cmap=True) if hm == 'CTCF': pal = sns.light_palette('red', as_cmap=True) cbarvmin = 0 if loc % 3 == 2: ax.set_axis_off() elif loc == 0: g = sns.heatmap(df, ax=ax, yticklabels=False, xticklabels=True, cbar=True, cmap=pal, vmin=cbarvmin, vmax=vmax, cbar_kws={"shrink": 0.5}) ax.set_ylabel('{} ChIP-seq'.format(hm), fontsize=13) xp = g.get_xticks() #;print(xp) ax.set_xticks([xp[0], xp[-1]]) ax.set_xticklabels(['-1kb', '1kb'], rotation=30, fontsize=13) ax.set_title('{}'.format(title), fontsize=14) ax.tick_params(axis='x', direction='out', length=0, width=1, colors='black') cbar = g.collections[0].colorbar cbar.set_clim(vmax * .15, vmax) cbar.remove() elif loc == total - 1: g = sns.heatmap(df, ax=ax, yticklabels=False, xticklabels=False, cbar=True, cmap=pal, vmin=cbarvmin, vmax=vmax, cbar_kws={"shrink": 0.5}) ax.set_title('{}'.format(title), fontsize=15) ax.set_ylabel('') ax.tick_params(axis='y', direction='out', length=0, width=1, colors='black') cbar = g.collections[0].colorbar cbar.set_clim(vmax * .15, vmax) cbar.set_ticks([cbarvmin, vmax]) cbar.set_ticklabels([vmin, vmax]) cbar.ax.set_position([.9, 0.35, .8, .5]) cbar.ax.tick_params(axis='y', direction='out', length=0, width=1, colors='black') else: g = sns.heatmap(df, ax=ax, yticklabels=False, xticklabels=False, cbar=True, cmap=pal, vmin=cbarvmin, vmax=vmax) ax.set_title('{}'.format(title), fontsize=15) ax.set_ylabel('') ax.tick_params(axis='y', direction='out', length=0, width=1, colors='black') cbar = g.collections[0].colorbar cbar.set_clim(vmax * .15, vmax) cbar.remove() if hm == 'H3K27me3' and loc % 3 == 1: ax.text(210, df.shape[0] * 1.15, '{}'.format(df.shape[0]), fontsize=12, ha='left') if loc % 3 == 1: # ==== add additional title ax.text(-100, -0.22 * df.shape[0] - .3, cancertype, fontsize=15) ax.hlines(y=-0.2 * df.shape[0] - .3, xmin=-230, xmax=200, clip_on=False, lw=1.1)
plt.title(f'{img_num}: {class_label}') y.axes.get_xaxis().set_visible(False) y.axes.get_yaxis().set_visible(False) plt.show() # plt.savefig('Class Results') # In[29]: imgs = [img.split('.')[0] for img in next(os.walk(ALIGNED_TEST_DIR))[2]] # ### Tabulated Prediction Probabilities # In[30]: # Creates a HeatMap using the seaborn library cm = sns.light_palette("blue", as_cmap=True) df = pd.DataFrame.from_dict(results, orient='index', columns=imgs) df.style.format("{:.2%}").set_caption('Confidence Values').background_gradient( cmap=cm) # In[31]: """ Re-structures the results dictionary so that each class_label points to another dictionary {k, v} where k = the Image_Id number and v = the confidence value """ def gen_results(results): my_dict = {} for cls in LABELS: probs = iter(results[cls])
def shot_chart(x, y, kind="scatter", title="", color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", court_lw=1, outer_lines=False, flip_court=False, kde_shade=True, gridsize=None, ax=None, despine=False, **kwargs): """ Returns an Axes object with player shots plotted. Parameters ---------- x, y : strings or vector The x and y coordinates of the shots taken. They can be passed in as vectors (such as a pandas Series) or as columns from the pandas DataFrame passed into ``data``. data : DataFrame, optional DataFrame containing shots where ``x`` and ``y`` represent the shot location coordinates. kind : { "scatter", "kde", "hex" }, optional The kind of shot chart to create. title : str, optional The title for the plot. color : matplotlib color, optional Color used to plot the shots cmap : matplotlib Colormap object or name, optional Colormap for the range of data values. If one isn't provided, the colormap is derived from the valuue passed to ``color``. Used for KDE and Hexbin plots. {x, y}lim : two-tuples, optional The axis limits of the plot. court_color : matplotlib color, optional The color of the court lines. court_lw : float, optional The linewidth the of the court lines. outer_lines : boolean, optional If ``True`` the out of bound lines are drawn in as a matplotlib Rectangle. flip_court : boolean, optional If ``True`` orients the hoop towards the bottom of the plot. Default is ``False``, which orients the court where the hoop is towards the top of the plot. kde_shade : boolean, optional Default is ``True``, which shades in the KDE contours. gridsize : int, optional Number of hexagons in the x-direction. The default is calculated using the Freedman-Diaconis method. ax : Axes, optional The Axes object to plot the court onto. despine : boolean, optional If ``True``, removes the spines. kwargs : key, value pairs Keyword arguments for matplotlib Collection properties or seaborn plots. Returns ------- ax : Axes The Axes object with the shot chart plotted on it. """ if ax is None: ax = plt.gca() if cmap is None: cmap = sns.light_palette(color, as_cmap=True) if not flip_court: ax.set_xlim(xlim) ax.set_ylim(ylim) else: ax.set_xlim(xlim[::-1]) ax.set_ylim(ylim[::-1]) ax.tick_params(labelbottom="off", labelleft="off") ax.set_title(title, fontsize=18) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) if kind == "scatter": ax.scatter(x, y, c=color, **kwargs) elif kind == "kde": sns.kdeplot(x, y, shade=kde_shade, cmap=cmap, ax=ax, **kwargs) ax.set_xlabel('') ax.set_ylabel('') elif kind == "hex": if gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) gridsize = int(np.mean([x_bin, y_bin])) ax.hexbin(x, y, gridsize=gridsize, cmap=cmap, **kwargs) else: raise ValueError("kind must be 'scatter', 'kde', or 'hex'.") # Set the spines to match the rest of court lines, makes outer_lines # somewhate unnecessary for spine in ax.spines: ax.spines[spine].set_lw(court_lw) ax.spines[spine].set_color(court_color) if despine: ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) return ax
def shot_chart_jointplot(x, y, data=None, kind="scatter", title="", color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", court_lw=1, outer_lines=False, flip_court=False, size=(12, 11), space=0, despine=False, joint_kws=None, marginal_kws=None, **kwargs): """ Returns a seaborn JointGrid using sns.jointplot Parameters ---------- x, y : strings or vector The x and y coordinates of the shots taken. They can be passed in as vectors (such as a pandas Series) or as column names from the pandas DataFrame passed into ``data``. data : DataFrame, optional DataFrame containing shots where ``x`` and ``y`` represent the shot location coordinates. kind : { "scatter", "kde", "hex" }, optional The kind of shot chart to create. title : str, optional The title for the plot. color : matplotlib color, optional Color used to plot the shots cmap : matplotlib Colormap object or name, optional Colormap for the range of data values. If one isn't provided, the colormap is derived from the valuue passed to ``color``. Used for KDE and Hexbin joint plots. {x, y}lim : two-tuples, optional The axis limits of the plot. The defaults represent the out of bounds lines and half court line. court_color : matplotlib color, optional The color of the court lines. court_lw : float, optional The linewidth the of the court lines. outer_lines : boolean, optional If ``True`` the out of bound lines are drawn in as a matplotlib Rectangle. flip_court : boolean, optional If ``True`` orients the hoop towards the bottom of the plot. Default is ``False``, which orients the court where the hoop is towards the top of the plot. gridsize : int, optional Number of hexagons in the x-direction. The default is calculated using the Freedman-Diaconis method. size : tuple, optional The width and height of the plot in inches. space : numeric, optional The space between the joint and marginal plots. {joint, marginal}_kws : dicts Additional kewyord arguments for joint and marginal plot components. kwargs : key, value pairs Keyword arguments for matplotlib Collection properties or seaborn plots. Returns ------- grid : JointGrid The JointGrid object with the shot chart plotted on it. """ # If a colormap is not provided, then it is based off of the color if cmap is None: cmap = sns.light_palette(color, as_cmap=True) if kind not in ["scatter", "kde", "hex"]: raise ValueError("kind must be 'scatter', 'kde', or 'hex'.") grid = sns.jointplot(x=x, y=y, data=data, stat_func=None, kind=kind, space=0, color=color, cmap=cmap, joint_kws=joint_kws, marginal_kws=marginal_kws, **kwargs) grid.fig.set_size_inches(size) # A joint plot has 3 Axes, the first one called ax_joint # is the one we want to draw our court onto and adjust some other settings ax = grid.ax_joint if not flip_court: ax.set_xlim(xlim) ax.set_ylim(ylim) else: ax.set_xlim(xlim[::-1]) ax.set_ylim(ylim[::-1]) draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) # Get rid of axis labels and tick marks ax.set_xlabel('') ax.set_ylabel('') ax.tick_params(labelbottom='off', labelleft='off') # Add a title ax.set_title(title, y=1.2, fontsize=18) # Set the spines to match the rest of court lines, makes outer_lines # somewhate unnecessary for spine in ax.spines: ax.spines[spine].set_lw(court_lw) ax.spines[spine].set_color(court_color) # set the margin joint spines to be same as the rest of the plot grid.ax_marg_x.spines[spine].set_lw(court_lw) grid.ax_marg_x.spines[spine].set_color(court_color) grid.ax_marg_y.spines[spine].set_lw(court_lw) grid.ax_marg_y.spines[spine].set_color(court_color) if despine: ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) return grid
if droplabelx and sharex and axs.ndim > 1: for ax in axs[:-1,:].flat: ax.xaxis.set_tick_params(which='both', labelbottom=False, labeltop=False) ax.xaxis.offsetText.set_visible(False) # Turn off y tick labels and offset text for all but the left most column if droplabely and sharey and axs.ndim > 1: for ax in axs[:,1:].flat: ax.yaxis.set_tick_params(which='both', labelleft=False, labelright=False) ax.yaxis.offsetText.set_visible(False) HEATMAP_COLORMAPS = { 'pvalue': sns.diverging_palette(h_neg = 10, h_pos = 240, l = 50, center = 'light'), #'pvalue': sns.light_palette("red", reverse = True), 'error': sns.light_palette("seagreen", reverse=True), 'error_gap': sns.diverging_palette(h_neg = 10, h_pos = 240, l = 50, center = 'light'), 'error_relgap': sns.diverging_palette(h_neg = 10, h_pos = 240, l = 50, center = 'light'), } def plot_group_heatmap(data, groups, p, ax, cmap, metric_type = 'error_gap', stat_field = 'train'): title_dict = { 'error': 'Error', 'error_gap': 'Error Gap', 'pvalue': 'Pr(Envyfree)', 'dcp_root_error': 'Pooled Error', 'dcp_root_error_gap': 'Rationality Gap', 'dcp_root_pvalue': 'Pr(Rational)', 'dcp_root_error_relgap': 'Rationality Gap (Relative)',
def one_plot(opt): sns.set(style="whitegrid", palette="pastel", color_codes=True) # Font settings for plot import matplotlib # matplotlib.rc('font', family='sans-serif') # matplotlib.rc('font', serif='Helvetica Neue') # matplotlib.rc('text', usetex='false') # matplotlib.rcParams['font.family'] = 'cursive' # Load dictionary lanecheck_dict = utils.load_pickle(opt.lanecheck_path) # Lanecheck out sub_out = [] vcpt_out = [] vid_out = [] reg_out = [] regtopk_out = [] # Check what out features are needed sub_flag = True vcpt_flag = True vid_flag = True reg_flag = True regtopk_flag= True check = random.choice(list(lanecheck_dict.values())) if check.get('sub_out') is None: sub_flag = False if check.get('vcpt_out') is None: vcpt_flag = False if check.get('vid_out') is None: vid_flag = False if check.get('reg_out') is None: reg_flag = False if check.get('regtopk_out') is None: regtopk_flag = False # Iterate through the lanecheck items del lanecheck_dict['acc'] for qid, q_dict in lanecheck_dict.items(): if sub_flag: sub_out.append( q_dict['sub_out'] ) if vcpt_flag: vcpt_out.append( q_dict['vcpt_out'] ) if vid_flag: vid_out.append( q_dict['vid_out'] ) if reg_flag: reg_out.append( q_dict['reg_out'] ) if regtopk_flag: regtopk_out.append( q_dict['regtopk_out'] ) if sub_flag: sub_out = np.stack(sub_out) if vcpt_flag: vcpt_out = np.stack(vcpt_out) if vid_flag: vid_out = np.stack(vid_out) if reg_flag: reg_out = np.stack(reg_out) if regtopk_flag: regtopk_out = np.stack(regtopk_out) import pandas as pd # Plot settings pal_tp_fp = {"True Positive":sns.light_palette("green")[1], "False Positive":sns.light_palette("red")[1]} pal_tn_fn = {"True Negative":sns.light_palette("red")[1], "False Negative":sns.light_palette("orange")[1]} plot_no = 1 sns.set(font_scale=3.0) sns.set_style("whitegrid") fig, ax = plt.subplots() x_labels = [] if sub_flag: sub_out = [ ('Subtitles', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in sub_out for a_idx, value in enumerate(aa[:5]) ] sub_out = [ element for element in sub_out if element[4] != 'Ignore' ] x_labels.append('Subtitles') if vcpt_flag: vcpt_out = [ ('Visual Concepts', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in vcpt_out for a_idx, value in enumerate(aa[:5]) ] vcpt_out = [ element for element in vcpt_out if element[4] != 'Ignore' ] x_labels.append('Visual Concepts') if vid_flag: vid_out = [ ('ImageNet', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in vid_out for a_idx, value in enumerate(aa[:5]) ] vid_out = [ element for element in vid_out if element[4] != 'Ignore' ] x_labels.append('ImageNet') if regtopk_flag: regtopk_out = [ ('Regional Features', value, aa[5], aa[6], confusion_matrix_tn_fn(a_idx, aa[5], aa[6])) for aa in regtopk_out for a_idx, value in enumerate(aa[:5]) ] regtopk_out = [ element for element in regtopk_out if element[4] != 'Ignore' ] x_labels.append('Regional Features') x_labels.append('Nothing inparticular') #plt.xticks([]) data = [] data += [('', 38, 1, 1, "True Negative")] data += [('1', -7, 1, 1, "True Negative")] data += sub_out data += vcpt_out data += vid_out data += regtopk_out maxx = 0 minn = 0 for dtuple in data: if maxx < dtuple[1]: maxx = dtuple[1] if minn > dtuple[1]: minn = dtuple[1] print(maxx) print(minn) # data += [('', 38.594997, 1, 1, "False Positive")] #data += [('1', -5.7718792, 1, 1, "False Positive")] data = pd.DataFrame(data, columns=['', 'Vote Contribution', 'ground_truth', 'prediction', 'Answer Type']) sns.violinplot(data=data, palette=pal_tn_fn, inner="quart", linewidth=2.5, hue='Answer Type', x='', y='Vote Contribution', split=True, legend=False, legend_out=True) plt.title('SVIR Trained Model') plt.show()
def heatmap_plot_zscore_ivscc(df_zscore_features, df_all, output_dir, title=None): print "heatmap plot:ivscc" sns.set_context("talk", font_scale=1.4) # Create a custom palette for dendrite_type colors dendrite_types = [np.nan, 'aspiny', 'sparsely spiny', 'spiny'] # dendrite_type_pal = sns.color_palette("coolwarm", len(dendrite_types)) dendrite_type_pal = sns.color_palette(["gray","black","purple","red"]) dendrite_type_lut = dict(zip(dendrite_types, dendrite_type_pal)) dendrite_type_colors = df_all['dendrite_type'].map(dendrite_type_lut) # Create a custom palette for creline colors cre_lines = np.unique(df_all['cre_line']) #print cre_lines #cre_lines = ['Pvalb-IRES-Cre','Sst-IRES-Cre','Gad2-IRES-Cre', 'Htr3a-Cre_NO152', # 'Nr5a1-Cre', 'Ntsr1-Cre','Rbp4-Cre_KL100' ,'Rorb-IRES2-Cre-D', 'Scnn1a-Tg2-Cre', # 'Scnn1a-Tg3-Cre','Slc17a6-IRES-Cre','Cux2-CreERT2'] cre_line_pal = sns.color_palette("BrBG", len(cre_lines)) cre_line_lut = dict(zip(cre_lines, cre_line_pal)) # map creline type to color cre_line_colors = df_all['cre_line'].map(cre_line_lut) layers = np.unique(df_all['layer']) layer_pal = sns.light_palette("black", len(layers)) layer_lut = dict(zip(layers, layer_pal)) layer_colors = df_all['layer'].map(layer_lut) # # only if types are available # types = np.unique(df_all['types']) # #reorder # types = ['NGC','multipolar','symm', 'bitufted','bipolar','tripod', 'Martinotti','cortico-cortical', 'cortico-thal','non-tufted', 'short-thick-tufted', 'tufted','thick-tufted'] # type_pal = sns.color_palette("coolwarm", len(types))# sns.diverging_palette(220, 20, n=len(types))# sns.color_palette("husl", len(types)) # type_lut = dict(zip(types, type_pal)) # type_colors = df_all['types'].map(type_lut) # Create a custom colormap for the heatmap values #cmap = sns.diverging_palette(240, 10, as_cmap=True) linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean') print "linkage genearted" data = df_zscore_features.transpose() row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean') feature_order = hierarchy.leaves_list(row_linkage) matchIndex = [data.index[x] for x in feature_order] data = data.reindex(matchIndex) print "plot heatmap" g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean', linewidths = 0.0,col_colors = [cre_line_colors,layer_colors], cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(60,15)) pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90 ) pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) pl.subplots_adjust(left=0.1, bottom=0.5, right=0.8, top=0.95) # !!!!! #pl.tight_layout( fig, h_pad=20.0, w_pad=20.0) if title: pl.title(title) location ="best" num_cols=1 # Legend for row and col colors for label in cre_lines: g.ax_row_dendrogram.bar(0, 0, color=cre_line_lut[label], label=label, linewidth=0.0) g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0) for i in range(3): g.ax_row_dendrogram.bar(0, 0, color = "white", label=" ", linewidth=0) g.ax_row_dendrogram.legend(loc=location, ncol=num_cols, borderpad=0.0) for label in layers: g.ax_row_dendrogram.bar(0, 0, color=layer_lut[label], label=label, linewidth=0) g.ax_row_dendrogram.legend(loc=location, ncol=1,borderpad=0.0) # # for label in types: # g.ax_row_dendrogram.bar(0, 0, color=type_lut[label], label=label,linewidth=0) # g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0.0) # # # g.ax_row_dendrogram.bar(0, 0, color = "white", label=" ", linewidth=0) # g.ax_row_dendrogram.legend(loc=location, ncol=num_cols, borderpad=0.0) # for label in dendrite_types: # g.ax_row_dendrogram.bar(0, 0, color = dendrite_type_lut[label], label=label, linewidth=0) # g.ax_row_dendrogram.legend(loc=location, ncol= num_cols, borderpad=0.0) filename = output_dir + '/zscore_feature_heatmap.png' pl.savefig(filename, dpi=300) #pl.show() print("save zscore matrix heatmap figure to :" + filename) pl.close() return linkage
def projection_pca(ds_d, ds_l, dest_path, pca=None, colors=None, do_3d=True, target_names=None): """ Parameters ---------- ds_d : np.array data in feature space, e.g. (#data, #feature) ds_l : sparse labels, i.e. (#data, 1) dest_path: str file name of plot pca: PCA prefitted PCA object to use to prject data of ds_d """ log_proc.info("Starting pca visualisation.") # pca vis paper_rc = {'lines.linewidth': 1, 'lines.markersize': 1} sns.set_context(rc=paper_rc) if ds_l.ndim == 2: ds_l = ds_l[:, 0] nb_labels = np.unique(ds_l) if pca is None: pca = PCA(3, whiten=True, random_state=0) pca.fit(ds_d) res = pca.transform(ds_d) # density plot 1st and 2nd PC plt.figure() plt.ylabel('$Z_2$', fontsize=15) plt.xlabel('$Z_1$', fontsize=15) if colors is None: # colors = ["r", "g", "b", "y", "k"] if len(target_names) == 5: colors = ["r", "g", "b", "y", "k"] else: cmap = plt.cm.get_cmap("Accent", len(target_names)) colors = [cmap(i) for i in range(len(target_names))] if target_names is None: target_names = ["%d" % i for i in nb_labels] for i in nb_labels: cur_pal = sns.light_palette(colors[i], as_cmap=True) d0, d1 = res[ds_l == i][:, 0], res[ds_l == i][:, 1] ax = sns.kdeplot(d0, d1, shade=False, cmap=cur_pal, alpha=0.6, shade_lowest=False, gridsize=100) ax.patch.set_facecolor('white') ax.collections[0].set_alpha(0) plt.scatter(res[ds_l == i][:, 0], res[ds_l == i][:, 1], s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i]) handles = [] for ii in range(len(target_names)): handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii])) plt.legend(handles=handles, loc="best") plt.savefig(dest_path, dpi=300) plt.close() if do_3d: # density plot 1st and 3rd PC plt.figure() plt.ylabel('$Z_3$', fontsize=15) plt.xlabel('$Z_1$', fontsize=15) if colors is None: colors = ["r", "g", "b", "y", "k"] if target_names is None: target_names = ["%d" % i for i in nb_labels] for i in nb_labels: cur_pal = sns.light_palette(colors[i], as_cmap=True) d0, d2 = res[ds_l == i][:, 0], res[ds_l == i][:, 2] ax = sns.kdeplot(d0, d2, shade=False, cmap=cur_pal, alpha=0.6, shade_lowest=False, gridsize=100) ax.patch.set_facecolor('white') ax.collections[0].set_alpha(0) plt.scatter(res[ds_l == i][:, 0], res[ds_l == i][:, 2], s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i]) handles = [] for ii in range(len(target_names)): handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii])) plt.legend(handles=handles, loc="best") plt.savefig(os.path.splitext(dest_path)[0] + "_2.png", dpi=300) plt.close() # density plot 2nd and 3rd PC plt.figure() plt.ylabel('$Z_3$', fontsize=15) plt.xlabel('$Z_2$', fontsize=15) if colors is None: colors = ["r", "g", "b", "y", "k"] if target_names is None: target_names = ["%d" % i for i in nb_labels] for i in nb_labels: cur_pal = sns.light_palette(colors[i], as_cmap=True) d1, d2 = res[ds_l == i][:, 1], res[ds_l == i][:, 2] ax = sns.kdeplot(d1, d2, shade=False, cmap=cur_pal, alpha=0.6, shade_lowest=False, gridsize=100) ax.patch.set_facecolor('white') ax.collections[0].set_alpha(0) plt.scatter(res[ds_l == i][:, 1], res[ds_l == i][:, 2], s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i]) handles = [] for ii in range(len(target_names)): handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii])) plt.legend(handles=handles, loc="best") plt.savefig(os.path.splitext(dest_path)[0] + "_3.png", dpi=300) plt.close() return pca
def heatmap(df, color='green'): return df.fillna(0).style.background_gradient( cmap=sns.light_palette(color, as_cmap=True))
def hierarchical_clustering(self, X=None, markers=None, filename_ext='', export=True, indices=None, load_clustering=False, method='ward', distance='euclidean', cluster_fusion=False): Xs = X.copy() if markers is None: markers = self.markers else: marker_indices = np.array([self.markers.index(x) for x in markers]) Xs = Xs[:, marker_indices] columns = markers df = pd.DataFrame(Xs, columns=columns) # color palette for clusters col_pal = sns.color_palette("husl", self.nb_clusters) # perform clustering if not load_clustering: full_res = None cluster_res = hierarchy.linkage(Xs, method=method, metric=distance) else: filename = os.path.join( self.cluster_folder, 'cluster_assignment%s.pickle' % filename_ext) print('loading clustering from %s' % filename) fp = open(filename, 'rb') full_res = pickle.load(fp) fp.close() cluster_res = full_res['linkage'] # cut the tree ct = hierarchy.cut_tree(cluster_res, n_clusters=self.nb_clusters) cluster_assignment = ct.T[0] col_vec = np.array(col_pal)[ct.T[0]] # main result res = dict( zip(range(self.nb_clusters), [ np.where(cluster_assignment == i) for i in range(self.nb_clusters) ])) if cluster_fusion: indices = full_res['indices'] if indices is None: raise ValueError( 'It is not possible to make cluster fusions and downsampling.' ) fused_clusters = {} fusion_info = self.settings.cluster_fusion[self.settings.dataset] cluster_names = list(fusion_info.keys()) fused_cluster_assignment_indices = np.zeros( self.nb_clusters, dtype=np.uint8) + len(cluster_names) for k, population_name in enumerate(cluster_names): fused_clusters[k] = (np.hstack( [res[i][0] for i in fusion_info[population_name]]), ) fused_cluster_assignment_indices[ fusion_info[population_name]] = k cluster_assignment = fused_cluster_assignment_indices[ cluster_assignment] col_pal = sns.color_palette("husl", len(fusion_info)) + [(1, 1, 1)] col_vec = np.array(col_pal)[cluster_assignment] res = fused_clusters res[len(cluster_names)] = np.where( cluster_assignment == len(cluster_names)) filename_ext = '%s_cluster_fusion' % filename_ext cmap = sns.light_palette("navy", as_cmap=True) print('starting clustering/heatmap generation ... ') g = sns.clustermap(df, row_linkage=cluster_res, robust=True, cmap=cmap, col_cluster=False, yticklabels=False, row_colors=col_vec) print('clustering/heatmap generation succeeded ... ') print('starting legend ... ') indices_ordered = g.dendrogram_row.reordered_ind ordered_cluster_labels = cluster_assignment[indices_ordered] cluster_order = list(dict.fromkeys(ordered_cluster_labels)) # legend for class colors for label in cluster_order: #range(self.nb_clusters): if not cluster_fusion: g.ax_col_dendrogram.bar(0, 0, color=col_pal[label], label='%i(%i)' % (label, len(res[label][0])), linewidth=0) else: if label < len(cluster_names): cluster_name = cluster_names[label] else: cluster_name = 'not assigned' g.ax_col_dendrogram.bar(0, 0, color=col_pal[label], label='%s(%i)' % (cluster_name, len(res[label][0])), linewidth=0) if cluster_fusion: legend_ncol = 3 else: legend_ncol = 5 lgd = g.ax_col_dendrogram.legend(loc="center", ncol=legend_ncol) # to avoid and overlap of this HUGE legend with the heatmap. dendro_col = g.ax_col_dendrogram.get_position() standard_height = 0.18 #new_height = max(dendro_col.height / 4.0 * (self.nb_clusters // 5) - dendro_col.height, dendro_col.height) new_height = max( standard_height / 4.0 * (len(res) // legend_ncol) - standard_height, standard_height) g.ax_col_dendrogram.set_position( [dendro_col.x0, dendro_col.y0, dendro_col.width, new_height]) print('saving figure ... ') g.savefig( os.path.join(self.cluster_folder, 'clustering%s.png' % filename_ext)) full_res = { 'res': res, 'colors': col_pal, 'linkage': cluster_res, 'indices': indices } if export and not load_clustering: print('exporting results ... ') filename = os.path.join( self.cluster_folder, 'cluster_assignment%s.pickle' % filename_ext) fp = open(filename, 'wb') pickle.dump(full_res, fp) fp.close() # save dendrogram #fig = plt.figure(figsize=(Xs.shape[0] / 10, 8)) #dn = hierarchy.dendrogram(cluster_res) #plt.savefig(os.path.join(self.cluster_folder, 'dendrogram_ward%s.pdf' % filename_ext)) #plt.close('all') return full_res
def plot_position(self, ref_id:str, pos:int=None, split_samples:bool=False, figsize:tuple=(30,10), palette:str="Set2", plot_style:str="ggplot", xlim:tuple=(None,None), ylim:tuple=(None,None), alpha:float=0.3, pointSize:int=20, scatter:bool=True, kde:bool=True, model:bool=False, gmm_levels:int=50): """ Plot the dwell time and median intensity at the given position as a scatter plot. * ref_id Valid reference id name in the database * pos Position of interest * split_samples If True, samples for a same condition are represented separately. If False, they are merged per condition * figsize Length and heigh of the output plot * palette Colormap. See https://matplotlib.org/users/colormaps.html, https://matplotlib.org/examples/color/named_colors.html * plot_style Matplotlib plotting style. See https://matplotlib.org/users/style_sheets.html * xlim A tuple of explicit limits for the x axis * ylim A tuple of explicit limits for the y axis * kde plot the KDE of the intensity/dwell bivarariate distributions in the two samples * scatter if True, plot the individual data points * pointSize int specifying the point size for the scatter plot * model If true, plot the GMM density estimate * gmm_levels number of contour lines to use for the GMM countour plot """ # Extract data for ref_id ref_data = self[ref_id] # Check that position is valid if not isinstance(pos, int): raise NanocomporeError("pos must be a single position") if pos > len(ref_data): raise NanocomporeError("Position out of range") # if not ref_data[pos]['data']["intensity"] or not ref_data[pos]['data']["dwell"]: # raise NanocomporeError("No data found for selected position") # Extract data from database if position in db ref_kmer = ref_data[pos]['ref_kmer'] data = ref_data[pos]['data'] # Sample colors in palette col_gen = self.__color_generator(palette=palette, n=self._metadata["n_samples"] if split_samples else 2) # Collect and transform data in dict plot_data_dict = OrderedDict() for cond_lab, cond_dict in ref_data[pos]["data"].items(): if split_samples: for samp_lab, sample_val in cond_dict.items(): plot_data_dict["{}_{}".format(cond_lab, samp_lab)] = { "intensity":scale(sample_val["intensity"]), "dwell":scale(np.log10(sample_val["dwell"])), "color":next(col_gen)} else: intensity_list = [] dwell_list = [] for samp_lab, sample_val in cond_dict.items(): intensity_list.append(sample_val["intensity"]) dwell_list.append(sample_val["dwell"]) plot_data_dict[cond_lab] = { "intensity":scale(np.concatenate(intensity_list)), "dwell":scale(np.log10(np.concatenate(dwell_list))), "color":next(col_gen)} # Add GMM model if required and available if model and 'txComp' in ref_data[pos] and 'GMM_model' in ref_data[pos]['txComp']: model = ref_data[pos]['txComp']['GMM_model']['model'] if not isinstance(model, GaussianMixture): raise NanocomporeError("The GMM_model slot for this position is not an instance of the GaussianMixture class") condition_labels = tuple(data.keys()) global_intensity = scale(np.concatenate(([v['intensity'] for v in data[condition_labels[0]].values()]+[v['intensity'] for v in data[condition_labels[1]].values()]), axis=None)) global_dwell = scale(np.log10(np.concatenate(([v['dwell'] for v in data[condition_labels[0]].values()]+[v['dwell'] for v in data[condition_labels[1]].values()]), axis=None))) x = np.linspace(min(global_intensity), max(global_intensity), num=1000) y = np.linspace(min(global_dwell), max(global_dwell), num=1000) X, Y = np.meshgrid(x, y) XX = np.array([X.ravel(), Y.ravel()]).T Z = -model.score_samples(XX) Z = Z.reshape(X.shape) else: model = None # plot collected data with pl.style.context(plot_style): fig, ax = pl.subplots(figsize=figsize) for label, d in plot_data_dict.items(): if kde: _ = sns.kdeplot( data=d["intensity"], data2=d["dwell"], cmap=sns.light_palette(d["color"], as_cmap=True), ax=ax, clip=((min(d["intensity"]), max(d["intensity"])), (min(d["dwell"]),max(d["dwell"])))) if scatter: _ = ax.scatter( x=d["intensity"], y=d["dwell"], color=d["color"], label=label, alpha=alpha, s=pointSize) if model: _ = ax.contour(X, Y, Z, levels=gmm_levels, alpha=alpha, colors="black") # Adjust display _ = ax.set_title("%s\n%s (%s)"%(ref_id,pos, ref_kmer)) _ = ax.set_ylabel("log10 (Dwell Time)") _ = ax.set_xlabel("Median Intensity") _ = ax.set_xlim(xlim) _ = ax.set_ylim(ylim) _ = ax.legend() pl.tight_layout() return(fig, ax)
def plot_series_statistics(observed=None, expected=None, total_stdev=None, explained_stdev=None, color_set='Set2', xscale="linear", yscale="linear", xlabel="feature", ylabel="value", y_cutoff=None, sort_by='expected', sort_ascending=True, despine=True, legend_enable=True, legend_title=None, legend_loc='best', alpha=None, markersize=1.0, linewdith=1.2, fontsize=8, ax=None, title=None, return_handles=False, return_indices=False): """ This function can plot 2 comparable series, and the scale are represented in 2 y-axes (major axis - left) and the right one Parameters ---------- xcale, yscale : {"linear", "log", "symlog", "logit", ...} text or instance in `matplotlib.scale` despine : bool (default: True) if True, remove the top and right spines from plot, otherwise, only remove the top spine Example ------- >>> import numpy as np >>> from matplotlib import pyplot as plt >>> np.random.seed(1234) >>> x = np.random.randn(8000) >>> y = np.random.randn(8000) ... >>> z = np.random.rand(8000) + 3 >>> w = np.random.rand(8000) + 3 ... >>> ax, handles1 = V.plot_series_statistics(observed=x, expected=y, ... explained_stdev=np.std(x), ... total_stdev=np.std(y), ... color_set='Set1', ... legend_enable=False, legend_title="Series_1", ... return_handles=True) >>> _, handles2 = V.plot_series_statistics(observed=z, expected=w, ... explained_stdev=np.std(z), ... total_stdev=np.std(w), ... color_set='Set2', ... legend_enable=False, legend_title="Series_2", ... return_handles=True, ... ax=ax.twinx(), alpha=0.2) >>> plt.legend(handles=handles1 + handles2, loc='best', fontsize=8) """ import seaborn import matplotlib ax = to_axis2D(ax) observed, expected, total_stdev, explained_stdev = _preprocess_series( observed, expected, total_stdev, explained_stdev) # ====== color palette ====== # if isinstance(color_set, (tuple, list)): observed_color, expected_color, \ expected_total_standard_deviations_color, \ expected_explained_standard_deviations_color = color_set else: standard_palette = seaborn.color_palette(color_set, 8) observed_color = standard_palette[0] expected_palette = seaborn.light_palette(standard_palette[1], 5) expected_color = expected_palette[-1] expected_total_standard_deviations_color = expected_palette[1] expected_explained_standard_deviations_color = expected_palette[3] # ====== prepare ====== # sort_indices = _get_sort_indices(observed, expected, sort_by, sort_ascending) # ====== plotting expected and observed ====== # indices = np.arange( len(observed) if observed is not None else len(expected)) + 1 handles = [] # ====== series title ====== # if legend_title is not None: _, = ax.plot([], marker='None', linestyle='None', label="$%s$" % legend_title) handles.append(_) # ====== plotting expected and observed ====== # if observed is not None: _, = ax.plot(indices, observed[sort_indices], label="Observations", color=observed_color, linestyle="", marker="o", zorder=2, markersize=markersize) handles.append(_) if expected is not None: _, = ax.plot(indices, expected[sort_indices], label="Expectation", color=expected_color, linestyle="-", marker="", zorder=3, linewidth=linewdith) handles.append(_) # ====== plotting stdev ====== # if total_stdev is not None: lower = expected - total_stdev upper = expected + total_stdev ax.fill_between( indices, lower[sort_indices], upper[sort_indices], color=expected_total_standard_deviations_color, zorder=0, alpha=alpha, ) _ = matplotlib.patches.Patch(label="Stdev(Total)", color=expected_total_standard_deviations_color) handles.append(_) if explained_stdev is not None: lower = expected - explained_stdev upper = expected + explained_stdev ax.fill_between( indices, lower[sort_indices], upper[sort_indices], color=expected_explained_standard_deviations_color, zorder=1, alpha=alpha, ) _ = matplotlib.patches.Patch( label="Stdev(Explained)", color=expected_explained_standard_deviations_color) handles.append(_) # ====== legend ====== # if legend_enable: ax.legend(handles=handles, loc=legend_loc, fontsize=fontsize) # ====== adjusting ====== # if bool(despine): seaborn.despine(top=True, right=True) else: seaborn.despine(top=True, right=False) ax.set_yscale(yscale, nonposy="clip") ax.set_ylabel('[%s]%s' % (yscale, ylabel), fontsize=fontsize) ax.set_xscale(xscale) ax.set_xlabel('[%s]%s%s' % (xscale, xlabel, ' (sorted by "%s")' % str(sort_by).lower() if sort_by is not None else ''), fontsize=fontsize) # ====== set y-cutoff ====== # y_min, y_max = ax.get_ylim() if y_cutoff is not None: if yscale == "linear": y_max = y_cutoff elif yscale == "log": y_min = y_cutoff ax.set_ylim(y_min, y_max) ax.tick_params(axis='both', labelsize=fontsize) # ====== title ====== # if title is not None: ax.set_title(title, fontsize=fontsize, fontweight='bold') ret = [ax] if return_handles: ret.append(handles) if return_indices: ret.append(sort_indices) return ax if len(ret) == 1 else tuple(ret)
from dataclasses import dataclass, field import seaborn as sns import matplotlib.pyplot as plt import pandas as pd GREEN = "#679436" BLUE = "#4F6DB8" BLUE_CMAP = sns.light_palette(BLUE, n_colors=10, reverse=False, as_cmap=False) CMAP = BLUE_CMAP OUTPUTS = [ "System cost (€)", "Solar (MW)", "Wind (MW)", "Bioenergy (MW)", "Storage (MW)", "Storage (MWh)" ] DIFF_OUTPUTS = [ "System cost", "Total supply\ncapacity", "Total balancing\ncapacity" ] ROW_INDEX = [ "y-continental-scale-cost-eur", "y-national-scale-cost-eur", "y-cost-diff-eur", "y-cost-diff-relative", "y-supply-diff-relative", "y-wind-diff-relative", "y-balancing-diff-relative", "y-continental-scale-pv-gw", "y-national-scale-pv-gw", "y-continental-scale-wind-gw", "y-national-scale-wind-gw", "y-continental-scale-hydro-gw", "y-national-scale-hydro-gw", "y-continental-scale-biofuel-gw", "y-national-scale-biofuel-gw", "y-continental-scale-storage-gw", "y-national-scale-storage-gw", "y-continental-scale-storage-gwh", "y-national-scale-storage-gwh", "y-continental-scale-transmission-gwkm", "y-regional-scale-cost-eur", "y-regional-scale-pv-gw", "y-regional-scale-wind-gw",
def set_legend(count_df, ax, color_species=[], color_gradient='red', size='xx-small'): ''' Function that plot the legend of the headmap and barplot on the bottom left :param count_df: Number of the genomes for each phyla order in the same order as the heatmap :type: pandas.DataFrame :param ax: The axe on which to plot the sub figure :type: matplotlib.axes.Axes :param color_species: List of the color for the group of phyla used in the barplot :type: list of rgb color :param color_gradient: color for the gradient used in the heatmap :type: color in str, rgb or hex ''' axins = inset_axes( ax, width="50%", # width = 50% of parent_bbox width height="15%", # height : 15% loc='upper center') axins.set_title(label='Colour key (% of genomes with at least one genes)', fontdict={'fontsize': size}) # do the gradient legend oan the first ax cmap = sns.light_palette(color_gradient, as_cmap=True) cbar = plt.colorbar( ScalarMappable(cmap=cmap), cax=axins, orientation='horizontal', ) cbar.ax.tick_params(labelsize=size) cbar.set_ticks([0, 0.25, 0.5, 0.75, 1]) cbar.set_ticklabels(["0", "25", "50", "75", '100']) ax.tick_params( axis='both', which='both', left=False, bottom=False, labelleft=False, labelbottom=False, ) sns.despine(ax=ax, left=True, bottom=True) # ax.set_title('Colour key (% of\ngenomes with at least one genes)', size = 'x-small') # Do the square on the second mini_tab = pd.DataFrame(0, index=['Proteobacteria (𝛼, 𝛽, 𝛾)', 'Rest'], columns=["Count"]) mini_tab.loc['Proteobacteria (𝛼, 𝛽, 𝛾)'] = count_df.loc[ 'Gammaproteobacteria'] + count_df.loc[ 'Betaproteobacteria'] + count_df.loc['Alphaproteobacteria'] mini_tab.loc['Rest'] = count_df.sum( ) - mini_tab.loc['Proteobacteria (𝛼, 𝛽, 𝛾)'] if color_species == []: color = [(0.65098041296005249, 0.80784314870834351, 0.89019608497619629, 1.0), (0.3997693305214246, 0.6478123867044262, 0.80273742044673246, 1.0)] else: color = color_species legend = [ r'Proteobacteria ($\alpha$, $\beta$, $\gamma$) ({} genomes)'.format( int(mini_tab.loc['Proteobacteria (𝛼, 𝛽, 𝛾)'].values)), 'Rest of the dataset ({} genomes)'.format( int(mini_tab.loc['Rest'].values)) ] handles = [ mpatches.Patch(color=color[i], label=legend[i]) for i in range(2) ] ax.legend(handles=handles, frameon=False, fontsize=size, loc='lower center') return
for k in keys]) X = np.transpose(X, (-1, 1, 2, 0)) X = moving_average(X, ma_window_size, axis=2) if 3 in to_plot: fig, axes = plt.subplots(2, 2, figsize=(16, 12), sharex=False, sharey=False) axes = axes.ravel() n_yticks = [0, 200, 400, 700] for i, (key, color) in enumerate(zip(keys, colors_context)): ax = axes[i] ax.set_ylim(-100, 700) ax.text(50, 710, 'abcd'[i], weight='bold', fontsize=24) for degree, (x, subcolor) in enumerate( zip(X, sns.light_palette(color, len(X))[::-1]), 1): sns.tsplot(rescale(x[::decim, :, i:i + 1] + 1, axis=1), color=subcolor, condition='degree %i' % degree, ax=ax, ci=ci) # plt.xticks(times, times[::1000]) if i == 0: ax.set_ylabel('Relative change in CP [percent]', labelpad=10) ax.set_title(mapping[key]) ax.set_xlim(0, max(n_xticks)) ax.set_xticks(n_xticks) if i in (1, 3): ax.set_ylabel('') ax.set_yticks(n_yticks, ['', '', '', '']) if i in (0, 1): ax.set_xlabel('') ax.set_yticks(n_yticks, ['', '', '', ''])
Diff_stats['NSE_Kustas'] = { 'Rn' : NSE(Rn_EC, Rn_mod_Kustas), 'G' : NSE(G_EC, G_mod_Kustas), 'H_BR' : NSE(H_EC_corr_EB, H_mod_Kustas), 'LE_BR' : NSE(LE_EC_corr_EB, LE_mod_Kustas)} diff_Stats = pd.DataFrame(Diff_stats, columns = ['NSE_kB23', 'NSE_Lhomme', 'NSE_Kustas']) #%% Plotting against raw EC fluxes colors = ['#3498db', '#2ecc71', '#f7cf33', '#fc9d1d','#fd484d', '#9b59b6', '#51677b'] pal = sns.color_palette(colors) # http://www.husl-colors.org/ pal_red_light = sns.light_palette((11.4, 97.4, 58.1), input="husl") pal_red_dark = sns.dark_palette((11.4, 97.4, 58.1), input="husl") pal_blue_light = sns.light_palette((242.2, 90.1, 60.2), input="husl") pal_blue_dark = sns.dark_palette((242.2, 90.1, 60.2), input="husl") pal_orange_light = sns.light_palette((41.2, 96.8, 72.8), input="husl") pal_orange_dark = sns.dark_palette((41.2, 96.8, 72.8), input="husl") pal_green_light = sns.light_palette((137.9, 93.2, 72.9), input="husl") pal_green_dark = sns.dark_palette((137.9, 93.2, 72.9), input="husl") sns.set(context = "paper", style = 'white', palette = pal, rc = {'axes.labelsize': 18.0, 'figure.figsize': [14, 7], 'legend.fontsize': 16.0, 'xtick.labelsize': 18.0, 'ytick.labelsize': 18.0, 'xtick.major.size': 4.0, 'ytick.major.size': 4.0}) # Options: EC_raw,
def corrplots_from_flatmat(ipfile,opname): if not os.path.isfile(opname.replace('.png','.csv')): print '### Loading and unstacking corrmat' #flatmat=pd.read_csv('/data/ss_nifti/analysis_res/flatcorrmats/CC200_mask_2mm.csv',header=None) ## Above flatmat has no diagonal #corrmat=pd.read_csv('/data/ss_nifti/analysis_res/stdcorrmats/CC200_mask_2mm.csv',index_col=0) corrmat=pd.read_csv(ipfile,index_col=0) #flatmat=corrdf_to_coldf(corrmat, 'yes') flatmat=corrmat.unstack() flatmat.to_csv('temp.csv') flatmat=pd.read_csv('temp.csv',header=None) os.remove('temp.csv') print '### Loading QCMat' qcmat=pd.read_csv('/data/ss_nifti/analysis_res/cpac_motionop_qc.csv') flatmat.columns = ['u1','u2','p'] flatmat['u1']=flatmat.u1.str.split('/').str[-1] flatmat['u2']=flatmat.u2.str.split('/').str[-1] flatmat=flatmat[~flatmat.u1.str.contains('unknown')] flatmat=flatmat[~flatmat.u2.str.contains('unknown')] mdf=flatmat #mdf=pd.merge(flatmat,qcmat,left_on='u1',right_on='subseshscan',how='outer') print '##Excluding Stuff' ## Exclusion Criteria #mdf=mdf[mdf.p.notnull()] ##Getting rid of unmatched data mdf=mdf[~mdf.u1.isin(list(qcmat['subseshscan'][qcmat.Signal == 0].values))]##Getting rid of scans with poor snr mdf=mdf[~mdf.u2.isin(list(qcmat['subseshscan'][qcmat.Signal == 0].values))]##Getting rid of scans with poor snr mdf=mdf[~mdf.u1.isin(list(qcmat['subseshscan'][qcmat.Registration == 0].values))]##Getting rid of scans with poor registration mdf=mdf[~mdf.u2.isin(list(qcmat['subseshscan'][qcmat.Registration == 0].values))]##Getting rid of scans with poor registration mdf=mdf[~mdf.u1.str.contains('ssc_8')] mdf=mdf[~mdf.u2.str.contains('ssc_8')] mdf=mdf[~mdf.u1.str.contains('ssc_1-')] mdf=mdf[~mdf.u2.str.contains('ssc_1-')] mdf=mdf[~mdf.u1.str.contains('M00475776')] mdf=mdf[~mdf.u2.str.contains('M00475776')] mdf=mdf[~mdf.u1.str.contains('M00448814')] mdf=mdf[~mdf.u2.str.contains('M00448814')] mdf=mdf[~mdf.u1.str.contains('M00421916')] mdf=mdf[~mdf.u2.str.contains('M00421916')] mdf=mdf[~mdf.u1.str.contains('M00499588-ssc_7-_scan_inscapes')] mdf=mdf[~mdf.u2.str.contains('M00499588-ssc_7-_scan_inscapes')] mdf=mdf[~mdf.u1.str.contains('M00499588-ssc_7-_scan_flanker')] mdf=mdf[~mdf.u2.str.contains('M00499588-ssc_7-_scan_flanker')] mdf=mdf.reset_index() unqvals1=set(mdf.u1.values) unqvals2=set(mdf.u2.values) if len(unqvals1 - unqvals2) != 0: raise Exception('Columns dont match') print '###Sorting data' unqvalssort=natsorted(list(unqvals1),key=lambda s : s.split('-')[1]) unqvalssort=natsorted(list(unqvalssort),key=lambda s : s.split('-')[0]) unqvalssort=natsorted(list(unqvalssort),key=lambda s : s.split('-')[2]) temp=zip(mdf.u1.values,[i for i in range(0,len(mdf.u1.values))],mdf.u2.values) temp=natsorted(temp,key=lambda s : s[2].split('-')[1]) temp=natsorted(temp,key=lambda s : s[2].split('-')[0]) temp=natsorted(temp,key=lambda s : s[2].split('-')[2]) temp=natsorted(temp,key=lambda s : s[0].split('-')[1]) temp=natsorted(temp,key=lambda s : s[0].split('-')[0]) temp=natsorted(temp,key=lambda s : s[0].split('-')[2]) img1,ind1,img2=zip(*temp) print '### Making new DF' newarr=np.zeros((np.sqrt(len(ind1)),np.sqrt(len(ind1)))) ##Create New CorrMat for i,row in enumerate(ind1): #newdf.set_value(unqvalssort.index(mdf.u1.loc[row]),unqvalssort.index(mdf.u2[row]),mdf.p[row]) #newdf.ix[mdf.u1.loc[row],unqvalssort.index(mdf.u2[row])]=mdf.p[row] newarr[unqvalssort.index(mdf.u1.loc[row]),unqvalssort.index(mdf.u2[row])]=mdf.p[row] print i,row newdf=pd.DataFrame(newarr,columns=unqvalssort,index=unqvalssort) newdf.to_csv(opname.replace('.png','.csv')) else: print 'Already have mat!!!' newdf=pd.read_csv(opname.replace('.png','.csv'),index_col=0) newdf=newdf.drop([c for c in newdf.columns if 'M00475776' in c],0) newdf=newdf.drop([c for c in newdf.columns if 'M00475776' in c],1) print '##Shape @@@@', newdf.shape newcols=[tuple(n) for n in newdf.columns.str.split('-')] newinds=[tuple(n) for n in newdf.index.str.split('-')] newdf.columns=pd.MultiIndex.from_tuples(newcols) newdf.index=pd.MultiIndex.from_tuples(newinds) subject_labels=newdf.columns.get_level_values(0) subject_pal = sns.light_palette('green',n_colors=subject_labels.unique().size) subject_lut = dict(zip(map(str, subject_labels.unique()), subject_pal)) subject_colors = pd.Series(subject_labels).map(subject_lut) session_labels=newdf.columns.get_level_values(1) session_pal = sns.light_palette('blue',n_colors=session_labels.unique().size) session_lut = dict(zip(map(str, session_labels.unique()), session_pal)) session_colors = pd.Series(session_labels).map(session_lut) scan_labels=newdf.columns.get_level_values(2) scan_pal = sns.light_palette('orange',n_colors=scan_labels.unique().size) scan_lut = dict(zip(map(str, scan_labels.unique()), scan_pal)) scan_colors = pd.Series(scan_labels).map(scan_lut) rowcols=[subject_colors,session_colors,scan_colors] colcols=[subject_colors,session_colors,scan_colors] #plt.figure(figsize=(12,12)) print "###### Generating Heatmap ######" #sns.set_context("poster") g=sns.clustermap(newdf, row_cluster=False, col_cluster=False,xticklabels=False,yticklabels=False,row_colors=rowcols,col_colors=colcols, vmin=0, vmax=1,cmap='Reds') #for label in subject_labels.unique(): # g.ax_col_dendrogram.bar(0, 0, color=subject_lut[label], label=label, linewidth=0) # g.ax_col_dendrogram.legend(loc="center top", ncol=2) #for label in session_labels.unique(): # g.ax_col_dendrogram.bar(0, 0, color=session_lut[label], label=label, linewidth=0) # g.ax_col_dendrogram.legend(loc="center upper", ncol=2) for label in scan_labels.unique(): g.ax_col_dendrogram.bar(0, 0, color=scan_lut[label], label=label, linewidth=0) g.ax_col_dendrogram.legend(loc="upper center", ncol=2) opdir='/'.join(opname.split('/')[:-1]) if not os.path.isdir(opdir): print "### Making Directory ####" os.makedirs(opdir) print "###### Saving fig to "+opname+" ######" #plt.title(opname.split('/')[-1].split('.')[0]) #plt.tight_layout() #plt.gca().tight_layout() g.savefig(opname,dpi=300) plt.close() plt.cla()
output_train_nb = accuracy_metrics(gnb_model, y_train, x_train) output_valid_nb = accuracy_metrics(gnb_model, y_valid, x_valid) output_test_nb = accuracy_metrics(gnb_model, y_test, x_test) result_nb.append(output_train_nb + output_valid_nb + output_test_nb) s_nb = pd.DataFrame(result_nb, columns=[ 'Traindata Accuracy', 'Traindata AUROC', 'Validationdata Accuracy', 'Validationdata AUROC', 'Testdata Accuracy', 'Testdata AUROC' ]) import seaborn as sns cm = sns.light_palette("steelblue", as_cmap=True) Final_output_nb = s_nb.style.background_gradient(cmap=cm) Final_output_nb """**Decision Tree**""" from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import roc_auc_score, accuracy_score from IPython.display import HTML def accuracy_metrics(model, true_label, data): prediction = model.predict(data) acc = accuracy_score(true_label, prediction) auc_roc = roc_auc_score(true_label, prediction) return [acc, auc_roc]
# pairwise distances between all data points D = pairwise_distances(X,squared=True) # Similarity with constant sigma P_constant = _joint_probabilities_constant_sigma(D, .002) # Similarity with variable sigma P_binary = _joint_probabilities(D, 30., False) # output of this function needs to be reshaped to a square matrix P_binary_s = squareform(P_binary) # plot this similarity matrix plt.figure(figsize=(12,4)) pal = sns.light_palette("blue",as_cmap=True) plt.subplot(131) plt.imshow(D[::10, ::10], interpolation='none',cmap=pal) plt.axis('off') plt.title("Distance matrix", fontdict={'fontsize': 16}) plt.subplot(132) plt.imshow(P_constant[::10,::10],interpolation='none',cmap=pal) plt.axis('off') plt.title("$p_{j|i}$ (constant $\sigma$)",fontdict={'fontsize': 16}) plt.subplot(133) plt.imshow(P_binary_s[::10,::10],interpolation='none',cmap=pal) plt.axis('off') plt.title("$p_{j|i}$ (variable $\sigma$)",fontdict={'fontsize': 16})
return ((raw_data - np.mean(raw_data, axis = 0)) / np.std(raw_data, axis = 0)) # In[65]: df2[numerics] = standardize(df2[numerics]) import scipy as sp df2 = df2[(np.abs(sp.stats.zscore(df2[numerics])) < 3).all(axis=1)] # In[66]: from matplotlib.colors import ListedColormap my_cmap = ListedColormap(sns.light_palette((250, 100, 50), input="husl", n_colors=50).as_hex()) table = df2.drop(['patient_nbr', 'encounter_id'], axis=1).corr(method='pearson') table.style.background_gradient(cmap=my_cmap, axis = 0) # In[67]: df2['level1_diag1'] = df2['level1_diag1'].astype('object') df_pd = pd.get_dummies(df2, columns=['gender', 'admission_type_id', 'discharge_disposition_id', 'admission_source_id', 'max_glu_serum', 'A1Cresult', 'level1_diag1'], drop_first = True) just_dummies = pd.get_dummies(df_pd['race']) df_pd = pd.concat([df_pd, just_dummies], axis=1) df_pd.drop(['race'], inplace=True, axis=1)
def map(self): import matplotlib.pyplot as plt import seaborn as sns import cartopy.crs as ccrs from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter print("Plotting global distribution of samples...") dlon, dlat = 2.5, 1.9 npoints = len(self._df) df_map = self._df.copy() def rand_fact(npoints, width): """ Rescale a number from [0, 1) to [-width, width) """ points = 2.*np.random.random(npoints) - 1. points *= width return points l1 = df_map['lon'].copy() l2 = df_map['lat'].copy() df_map['lon'] = df_map['lon'] + rand_fact(npoints, dlon/2.) df_map['lat'] = df_map['lat'] + rand_fact(npoints, dlat/2.) lon, lat = df_map.lon, df_map.lat # Correct lon: # 1) some values may be < 0 or > 360, map these into [0, 360] lon[lon < 0] = lon[lon < 0] + 360. lon[lon > 360] = lon[lon > 360] - 360. # 2) map from [0, 360] -> [-180, 180] lon -= 180. df_map['lon'] = lon[:] proj = ccrs.PlateCarree() cmap = sns.light_palette("navy", 12, as_cmap=True) fig, ax = plt.subplots(1, 1, figsize=(10, 5), subplot_kw=dict(projection=proj)) cax = fig.add_axes([0, 0, 0.1, 0.1]) fig.subplots_adjust(hspace=0, wspace=0, top=0.925, left=0.1) hb = ax.hexbin(lon, lat, gridsize=(50, 15), bins='log', transform=proj, cmap=cmap) # This block of code helps correctly size and place the colorbar def resize_colorbar(event): plt.draw() posn = ax.get_position() cax.set_position([posn.x0 + posn.width + 0.01, posn.y0, 0.04, posn.height]) fig.canvas.mpl_connect('resize_event', resize_colorbar) ax.coastlines() ax.set_global() ax.set_xticks([-180, -120, -60, 0, 60, 120, 180], crs=proj) ax.set_yticks([-90, -60, -30, 0, 30, 60, 90], crs=proj) lon_formatter = LongitudeFormatter(zero_direction_label=True) lat_formatter = LatitudeFormatter() ax.xaxis.set_major_formatter(lon_formatter) ax.yaxis.set_major_formatter(lat_formatter) plt.colorbar(hb, cax, ax) plt.show()
hue='repayment_interval') g1.set_xticklabels(g1.get_xticklabels(), rotation=90) g1.set_title("Mean Loan by Month Year", fontsize=15) g1.set_xlabel("") g1.set_ylabel("Loan Amount", fontsize=12) plt.show() kiva_loans_data['Century'] = kiva_loans_data.date.dt.year loan = kiva_loans_data.groupby(['country', 'Century'])['loan_amount'].mean().unstack() loan = loan.sort_values([2017], ascending=False) f, ax = plt.subplots(figsize=(15, 20)) loan = loan.fillna(0) temp = sns.heatmap(loan, cmap='Reds') plt.show() sector_repayment = ['sector', 'repayment_interval'] cm = sns.light_palette("red", as_cmap=True) pd.crosstab( kiva_loans_data[sector_repayment[0]], kiva_loans_data[sector_repayment[1]]).style.background_gradient(cmap=cm) sector_repayment = ['country', 'repayment_interval'] cm = sns.light_palette("red", as_cmap=True) pd.crosstab( kiva_loans_data[sector_repayment[0]], kiva_loans_data[sector_repayment[1]]).style.background_gradient(cmap=cm) #Correlation Matrix corr = kiva_loans_data.corr() plt.figure(figsize=(12, 12)) sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values, annot=True,
mPlateTbl = makeByWellTbl(in_qcsummary, o.col_platewell, o.col_platename, 'frac_pairs_mapped', dtype=float) for plate in mPlateTbl: f, ax = plotByWellTbl(100. * mPlateTbl[plate], heatmap_kwargs={ 'annot': True, 'fmt': '.1f', 'cmap': sns.light_palette("red", as_cmap=True), 'vmin': 30., 'vmax': 90. }, annot_kwargs={'size': 7.}) plt.title('Capture set %s, %% properly mapped paired reads' % plate) plt.tight_layout() f.savefig('%s_%s.by_well.frac_pairs_mapped.pdf' % (o.out_base, plate))
def shot_chart_jointgrid(x, y, data=None, joint_type="scatter", title="", joint_color="b", cmap=None, xlim=(-250, 250), ylim=(422.5, -47.5), court_color="gray", court_lw=1, outer_lines=False, flip_court=False, joint_kde_shade=True, gridsize=None, marginals_color="b", marginals_type="both", marginals_kde_shade=True, size=(12, 11), space=0, despine=False, joint_kws=None, marginal_kws=None, **kwargs): """ Returns a JointGrid object containing the shot chart. This function allows for more flexibility in customizing your shot chart than the ``shot_chart_jointplot`` function. Parameters ---------- x, y : strings or vector The x and y coordinates of the shots taken. They can be passed in as vectors (such as a pandas Series) or as columns from the pandas DataFrame passed into ``data``. data : DataFrame, optional DataFrame containing shots where ``x`` and ``y`` represent the shot location coordinates. joint_type : { "scatter", "kde", "hex" }, optional The type of shot chart for the joint plot. title : str, optional The title for the plot. joint_color : matplotlib color, optional Color used to plot the shots on the joint plot. cmap : matplotlib Colormap object or name, optional Colormap for the range of data values. If one isn't provided, the colormap is derived from the value passed to ``color``. Used for KDE and Hexbin joint plots. {x, y}lim : two-tuples, optional The axis limits of the plot. The defaults represent the out of bounds lines and half court line. court_color : matplotlib color, optional The color of the court lines. court_lw : float, optional The linewidth the of the court lines. outer_lines : boolean, optional If ``True`` the out of bound lines are drawn in as a matplotlib Rectangle. flip_court : boolean, optional If ``True`` orients the hoop towards the bottom of the plot. Default is ``False``, which orients the court where the hoop is towards the top of the plot. joint_kde_shade : boolean, optional Default is ``True``, which shades in the KDE contours on the joint plot. gridsize : int, optional Number of hexagons in the x-direction. The default is calculated using the Freedman-Diaconis method. marginals_color : matplotlib color, optional Color used to plot the shots on the marginal plots. marginals_type : { "both", "hist", "kde"}, optional The type of plot for the marginal plots. marginals_kde_shade : boolean, optional Default is ``True``, which shades in the KDE contours on the marginal plots. size : tuple, optional The width and height of the plot in inches. space : numeric, optional The space between the joint and marginal plots. despine : boolean, optional If ``True``, removes the spines. {joint, marginal}_kws : dicts Additional kewyord arguments for joint and marginal plot components. kwargs : key, value pairs Keyword arguments for matplotlib Collection properties or seaborn plots. Returns ------- grid : JointGrid The JointGrid object with the shot chart plotted on it. """ # The joint_kws and marginal_kws idea was taken from seaborn # Create the default empty kwargs for joint and marginal plots if joint_kws is None: joint_kws = {} joint_kws.update(kwargs) if marginal_kws is None: marginal_kws = {} # If a colormap is not provided, then it is based off of the joint_color if cmap is None: cmap = sns.light_palette(joint_color, as_cmap=True) # Flip the court so that the hoop is by the bottom of the plot if flip_court: xlim = xlim[::-1] ylim = ylim[::-1] # Create the JointGrid to draw the shot chart plots onto grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim, space=space) # Joint Plot # Create the main plot of the joint shot chart if joint_type == "scatter": grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws) elif joint_type == "kde": grid = grid.plot_joint(sns.kdeplot, cmap=cmap, shade=joint_kde_shade, **joint_kws) elif joint_type == "hex": if gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) gridsize = int(np.mean([x_bin, y_bin])) grid = grid.plot_joint(plt.hexbin, gridsize=gridsize, cmap=cmap, **joint_kws) else: raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.") # Marginal plots # Create the plots on the axis of the main plot of the joint shot chart. if marginals_type == "both": grid = grid.plot_marginals(sns.distplot, color=marginals_color, **marginal_kws) elif marginals_type == "hist": grid = grid.plot_marginals(sns.distplot, color=marginals_color, kde=False, **marginal_kws) elif marginals_type == "kde": grid = grid.plot_marginals(sns.kdeplot, color=marginals_color, shade=marginals_kde_shade, **marginal_kws) else: raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.") # Set the size of the joint shot chart grid.fig.set_size_inches(size) # Extract the the first axes, which is the main plot of the # joint shot chart, and draw the court onto it ax = grid.fig.get_axes()[0] draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) # Get rid of the axis labels grid.set_axis_labels(xlabel="", ylabel="") # Get rid of all tick labels ax.tick_params(labelbottom="off", labelleft="off") # Set the title above the top marginal plot ax.set_title(title, y=1.2, fontsize=18) # Set the spines to match the rest of court lines, makes outer_lines # somewhate unnecessary for spine in ax.spines: ax.spines[spine].set_lw(court_lw) ax.spines[spine].set_color(court_color) # set the marginal spines to be the same as the rest of the spines grid.ax_marg_x.spines[spine].set_lw(court_lw) grid.ax_marg_x.spines[spine].set_color(court_color) grid.ax_marg_y.spines[spine].set_lw(court_lw) grid.ax_marg_y.spines[spine].set_color(court_color) if despine: ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) return grid
""" Aggregate data and calculate the failures % withthin the aggregated group Args: table groupby_col: list, column names failure: number of failured patients in that group total: total number of patients in that group Returns: an aggregated table with failure % """ table_new = table.groupby(groupby_col).count().reset_index() table_new['failure_percentage'] = table_new[failure]/ table_new[total] * 100 return table_new #aggregate failure % within each age group color = sns.light_palette("seagreen")[-2] patients_age = failure_rate(patients, 'member_age', 'outcome', 'event_id') #plot barplot(patients_age, 'member_age', 'failure_percentage',None, 'Failure % for Each Age Group', 'Member Age', 'Failure% = failures/patients', (35, 10), color) #data process: group by sex patients.loc[:,'member_sex'] = patients.loc[:,'member_sex'].map({0: 'female', 1: 'male'}) patients.loc[:,'outcome'] = patients.loc[:,'outcome'].fillna('non-failure')#count # of failures for each gender patients_sex = patients.groupby(['member_sex','outcome']).count().reset_index() #plot barplot(patients_sex, 'member_sex', 'event_id', 'outcome', '# of Failure vs Non-failuare Patients for Each Gender', 'Member Gender', 'Failure% = failures/patients',
def PlotID(DATA, IDs, ID, xx, yy, style='Paper', xlabel='default', ylabel='default', xsize=3, ysize=3, mode='-', \ hold=0, palete='Normal', legendMode=True, PlotName='default'): #DEtermine the number of dataframes in list if (xlabel == 'default'): xlabel = xx if (ylabel == 'default'): ylabel = yy NIds = 1 #Set color palete if (palete == 'Normal'): Pt = sns.color_palette('husl') elif (palete == 'VTANURA'): personalized_color = [ "#8b1f41", '#011627', '#ff6600', '#41ead4', '#808080', '#5f8297', '#000000' ] Pt = sns.color_palette(personalized_color) elif (palete == 'HCONTRAST'): personalized_color = [ "#004488", '#BB5566', '#575757', '#DDAA33', '#000000', '#8A8A8A' ] Pt = sns.color_palette(personalized_color) else: Pt = sns.light_palette(sns.xkcd_rgb[palete], NIds + 1, reverse=True) #sns.set_palette(Pt) # Select style if (style == 'Paper'): SMALL_SIZE = 8 MEDIUM_SIZE = 10 BIGGER_SIZE = 12 plt.rc('text', usetex=True) plt.rcParams['text.latex.preamble'] = [r'\usepackage{bm}'] plt.rc('font', family='serif') plt.rc('font', size=SMALL_SIZE) # controls default text sizes plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title plt.rc('axes', labelsize=BIGGER_SIZE) # fontsize of the x and y labels plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title plt.rcParams['figure.dpi'] = 400 plt.rcParams["figure.figsize"] = (xsize, ysize) lw = 1 elif (style == 'Slide'): SMALL_SIZE = 14 MEDIUM_SIZE = 16 BIGGER_SIZE = 24 plt.rc('text', usetex=True) plt.rc('font', family='serif') plt.rc('font', size=SMALL_SIZE) # controls default text sizes plt.rc('axes', titlesize=MEDIUM_SIZE) # fontsize of the axes title plt.rc('axes', labelsize=BIGGER_SIZE) # fontsize of the x and y labels plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title plt.rcParams['figure.dpi'] = 400 plt.rcParams["figure.figsize"] = (xsize, ysize) lw = 2 #else: #Loop trough data if (isinstance(ID, str)): #Id is string a = 0 for Frame in DATA: a = a + 1 if (ID == IDs[a - 1]): if (PlotName == 'default'): plt.plot(Frame[xx],Frame[yy], mode, \ label=IDs[a-1],linewidth=lw, color=Pt[0]) else: PlotName.plot(Frame[xx],Frame[yy], mode, \ label=IDs[a-1],linewidth=lw, color=Pt[0]) break else: if (PlotName == 'default'): plt.plot(DATA[ID][xx],DATA[ID][yy], mode , label=IDs[ID]\ ,linewidth=lw, color=Pt[0]) else: PlotName.plot(DATA[ID][xx],DATA[ID][yy], mode , label=IDs[ID]\ ,linewidth=lw, color=Pt[0]) if (PlotName == 'default'): plt.xlabel(xlabel) plt.ylabel(ylabel) # # show a legend on the plot if (legendMode == True): plt.legend(edgecolor='k',fancybox=False, framealpha=1, shadow=False, \ borderpad=1) if (hold == 0): plt.show() else: PlotName.set_xlabel(xlabel) PlotName.set_ylabel(ylabel) # # show a legend on the plot if (legendMode == True): PlotName.legend(edgecolor='k',fancybox=False, framealpha=1, shadow=False, \ borderpad=1) if (hold == 0): PlotName.show()
['L5 NP', 'L5 NP Slc17a8'], [ 'L6 IT Car3', 'L6 CT Olig', 'L6 IT Maf', 'L6 IT Ntn5 Mgp', 'L6 IT Ntn5 Inpp4b' ], ['L6 CT Nxph2', 'L6 CT Astro', 'L6 CT', 'L6 CT Grp'], ['L6b', 'L6b F2r'], ['Lamp5 Sncg', 'Lamp5 Egln3', 'Lamp5 Slc35d3'], ['Vip Rspo4', 'Vip Serpinf1', 'Vip'], ['Astro Ex', 'Astro Aqp4'], ['OPC Pdgfra'], ['VLMC Osr1'], ['Oligo Enpp6_1', 'Oligo Enpp6_2', 'Oligo Opalin'], ['Sncg Ptprk'], ['Endo Slc38a5', 'Endo Slc38a5_Peri_2', 'Endo Slc38a5_Peri_1'] ] macosko_regev_colors = [ sns.color_palette("Greens")[2:6], # Pvalb sns.light_palette("green", 5)[0:3], # Pvalb Ex sns.light_palette("green", 5)[3:5], # Pvalb Astro sns.light_palette("orange", 6), # L2/3 sns.light_palette('red')[1:6], # Sst sns.light_palette("cyan", 3), # L5 PT sns.light_palette('purple', 8)[1:8], # L5 IT Tcap sns.light_palette('purple', 7)[4:7], # L5 IT Aldh1a7 sns.light_palette("navy", 7)[3:5], # L5 NP sns.light_palette("brown", 7)[2:7], # L6 IT sns.dark_palette("brown", 8)[1:5], # L6 CT sns.dark_palette("green", 8)[5:7], # L6 sns.dark_palette("yellow", 7)[1:4], # Lamp5 sns.dark_palette("yellow", 7)[4:7], # Vip sns.color_palette("Paired", 4), # Astro OPC VLMC sns.color_palette('Greys', 3), # Oligo sns.dark_palette('tan'), # sncg
def make_IF_fig2(chosen_test_examples, folder_influence, model_name, formatname='png', name="IF"): # Ready to use # Overriding fonts plt.rcParams.update({ "pgf.texsystem": "pdflatex", "pgf.preamble": [ r"\usepackage[utf8x]{inputenc}", r"\usepackage[T1]{fontenc}", r"\usepackage{cmbright}", ] }) plt.rc('text', usetex=True) # Loading the mask mask = np.load('model/' + model_name + '_mask.npy') antimask = np.argsort(mask) # Training points U_array = np.concatenate((np.linspace(0, 1, 500), np.linspace(1.01, 40, 500))) U_testarray = np.concatenate( (np.linspace(0.01, 0.999, 20), np.linspace(1.02, 2, 5), np.linspace(2.066, 39, 20))) max_x = 40 min_y = -1e-2 max_y = 1e-1 trans_point = 1 U_value = '0' xticks_location = np.concatenate((np.array([0, 1, 2]), np.array([4, 10, 40]))) xticks_labels = np.concatenate( (np.array(['0', '1', '2']), np.array(['4', '10', '40']))) yticks_location = np.array([-1e-3, 0, 1e-3, 1e-1]) # Seaborn style set sns.set(style="whitegrid") sns.set_style("whitegrid", { 'grid.linestyle': 'dashed', "grid.color": "0.6", 'axes.edgecolor': '.1' }) # Plot colors colors = [ "windows blue", "amber", "greyish", "faded green", "dusty purple" ] palette_background = sns.light_palette((210, 90, 60), input="husl") palette_background = sns.light_palette("lightsteelblue", 6) c_left = sns.light_palette("navy")[-2] c_right = sns.light_palette("purple")[-2] c_help = sns.light_palette("green")[-2] c_harm = sns.light_palette("red")[-2] c_phase1 = palette_background[0] c_phase2 = palette_background[2] c_test = sns.xkcd_palette(colors)[1] marker_size = 0.5 marker_size_help = 1.5 i = 0 j = 0 fig, axs = plt.subplots(2, 2, figsize=(3 + 3 / 8, 2.5), sharex=True, sharey=True) fig.subplots_adjust(wspace=0.1, hspace=0.1) plt.rc('font', size=9) plt.rc('axes', labelsize=8) for test_sample in chosen_test_examples: # Influence functions of all train elements for one test example with open(folder_influence + '/original_influence_test' + str(test_sample) + '.txt') as filelabels: influence_functions = np.loadtxt(filelabels, dtype=float) antimasked_inf_funs = influence_functions[antimask] sorting_indices = np.argsort(antimasked_inf_funs) antimasked_inf_funs_phase1 = antimasked_inf_funs[0:502] antimasked_inf_funs_phase2 = antimasked_inf_funs[502:1001] U_array_phase1 = U_array[0:502] U_array_phase2 = U_array[502:1001] U_test_value = U_testarray[test_sample] # Figure axs[i][j].scatter(U_array_phase1, antimasked_inf_funs_phase1, marker='o', c=c_left, s=marker_size, label='training points, phase 1') axs[i][j].scatter(U_array_phase2, antimasked_inf_funs_phase2, marker='o', c=c_right, s=marker_size, label='training points, phase 2') axs[i][j].plot(U_array[sorting_indices[:5]], antimasked_inf_funs[sorting_indices[:5]], 'o', c=c_harm, markersize=marker_size_help, label='most harmful') axs[i][j].plot(U_array[sorting_indices[-5:]], antimasked_inf_funs[sorting_indices[-5:]], 'o', c=c_help, markersize=marker_size_help, label='most helpful') axs[i][j].plot([U_test_value, U_test_value], [min_y, max_y], color=c_test, label='test point (U\'\'=' + U_value + ')') axs[i][j].set_yscale('symlog', linthreshy=1e-3) axs[i][j].set_xscale('symlog', linthreshx=3) axs[i][j].set_xticks(xticks_location) axs[i][j].set_xticklabels(xticks_labels) axs[i][j].set_yticks(yticks_location) axs[i][j].tick_params(which='both', labelsize='small') axs[i][j].set_ylim(min_y, max_y) axs[i][j].set_xlim(0, max_x) axs[i][j].tick_params(which='both', direction='in') axs[i][j].grid(linewidth=0.1) for axis in ['top', 'bottom', 'left', 'right']: axs[i][j].spines[axis].set_linewidth(0.05) # Two colors background axs[i][j].axvspan(0, trans_point, facecolor=c_phase1, zorder=0, lw=0) axs[i][j].axvspan(trans_point, max_x, facecolor=c_phase2, zorder=0, lw=0) j += 1 if (j % 2 == 0): i += 1 j = 0 axs[0][0].text(35, 0.06, '(a)', verticalalignment='top', horizontalalignment='right', family="serif") axs[0][1].text(0.15, 0.06, '(b)', verticalalignment='top', horizontalalignment='left', family="serif") axs[1][0].text(35, 0.06, '(c)', verticalalignment='top', horizontalalignment='right', family="serif") axs[1][1].text(0.15, 0.06, '(d)', verticalalignment='top', horizontalalignment='left', family="serif") IFtext = fig.text(-0.03, 0.5, 'Influence function value', family="serif", va='center', rotation='vertical') Utext = fig.text(0.5, -0.01, '$V_1/\,J$ ', family="serif", ha='center') #fig.savefig('./figures/' + name + '.' + formatname, bbox_extra_artists=(Utext, IFtext), bbox_inches='tight') return fig
def print_confusion_matrix(confusion_matrix, class_names, figsize=(10, 7), fontsize=14): """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap. Arguments --------- confusion_matrix: numpy.ndarray The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. Similarly constructed ndarrays can also be used. class_names: list An ordered list of class names, in the order they index the given confusion matrix. figsize: tuple A 2-long tuple, the first value determining the horizontal size of the ouputted figure, the second determining the vertical size. Defaults to (10,7). fontsize: int Font size for axes labels. Defaults to 14. Returns ------- matplotlib.figure.Figure The resulting confusion matrix figure """ confusion_matrix = confusion_matrix.astype( 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] boundaries = [0.0, 0.5, 1.0] hex_colors = sns.light_palette('navy', n_colors=len(boundaries) * 2, as_cmap=False).as_hex() hex_colors = [hex_colors[i] for i in range(0, len(hex_colors), 2)] colors = list(zip(boundaries, hex_colors)) custom_color_map = LinearSegmentedColormap.from_list( name='custom_navy', colors=['blue', 'cyan', 'green', 'yellow', 'red'], ) df_cm = pd.DataFrame( confusion_matrix, index=class_names, columns=class_names, ) fig = plt.figure(figsize=figsize) try: heatmap = sns.heatmap(df_cm, annot=False, cbar=True, xticklabels=15, yticklabels=15, cmap=custom_color_map) # heatmap = sns.heatmap(df_cm, annot=False, cbar=False, xticklabels=15, yticklabels=15, cmap=sns.cubehelix_palette(50, hue=0.05, rot=0, light=0.9, dark=0, as_cmap=True)) except ValueError: raise ValueError("Confusion matrix values must be integers.") # heatmap.yaxis.set_ticklabels(10, rotation=0, ha='right', fontsize=fontsize) # heatmap.xaxis.set_ticklabels(10, rotation=0, ha='right', fontsize=fontsize) plt.yticks(rotation=0) plt.ylabel('True label') plt.xlabel('Predicted label') return fig
def heatmap_df(report_pivot, count_df, list_wanted, ax, size='xx-small', rotation=0, color_gradient='red'): ''' Function to plot the heatmap on the figure based on the presence absence data :param report_pivot: Pivot table of the report that represent the presence absence table of the gene/systems :type: pandas.DataFrame :param count_df: Number of the genomes for each phyla order in the same order as the heatmap :type: pandas.DataFrame :param list_wanted: List of all the genes/systems wanted to apear on the figure :type: list of str :param ax: The axe on which to plot the sub figure :type: matplotlib.axes.Axes :param size: Font size in points or as a string (e.g., 'large') :type: float or str :param rotation: The angle to which the label of the heatmap to rotate :type: int :param color_gradient: :type: :return: Nothing ''' cmap = sns.light_palette(color_gradient, as_cmap=True) try_missing = list(set(list_wanted) - set(report_pivot.index)) if try_missing: for missing in try_missing: report_pivot.loc[missing] = 0 df_annot = report_pivot.reindex(list_wanted) df_heatmap = df_annot.div(count_df.Count, axis=0) sns.heatmap( df_heatmap, cmap=cmap, linewidths=1, linecolor=(0.3997693305214246, 0.6478123867044262, 0.80273742044673246, 1.0), annot=df_annot, annot_kws={ 'color': 'black', 'fontsize': size }, cbar=False, ax=ax, fmt="d", yticklabels=False, ) ax.set_xticklabels(ax.get_xticklabels(), rotation=rotation) # The mesh is the figure itself here, so to change the facecolor of the cell in the heatmap # we need to parse the mesh as in the seaborn instance code # So now the 0 are white mesh = ax.collections[0] all_values_mesh = mesh.get_array() all_color_mesh = mesh.get_facecolors() new_color = [] len_values = len(all_values_mesh) ax.collections[0].set_facecolor('none') for i in range(len_values): if all_values_mesh[i] == 0: new_color.append('white') else: new_color.append(all_color_mesh[i]) mesh.set_facecolor(new_color) # Modify axis and ticks ax.xaxis.set_ticks_position('top') ax.tick_params( axis='x', which='both', labelsize=size, length=0, ) ax.xaxis.set_label_text("") ax.yaxis.set_label_text("") return
from models import BayesianMLP sns.set_style("white") n = 9 bnn_col = ["deep sky blue", "bright sky blue"] gpp_bnn_col = ["red", "salmon"] gp_col = ["green", "light green"] colors = {"bnn": bnn_col, "gpp": gpp_bnn_col, "gp": gp_col} sample_col = { "bnn": "bright sky blue", "gpp": "watermelon", "gp": "light lime" } pal_col = { "bnn": sns.light_palette("#3498db", n_colors=n), # nice blue "gpp": sns.light_palette("#e74c3c", n_colors=n), # nice red "gp": sns.light_palette("#2ecc71", n_colors=n), } # nice green eh not so nice project_dir = "../figures/" def plot_deciles(x_all, y_all_pred, y_all_ground_truth=None, x_train=None, y_train=None, mode="bnn", title=None): """
# xkcd中包含了一套针对随机RGB颜色的命名,产生了954个可以随时从xkcd_rgb字典中调用的已经被命名的颜色 # plt.plot([0, 1], [0, 1], sns.xkcd_rgb["pale red"], lw=3) # lw表示线宽 # plt.plot([0, 1], [0, 2], sns.xkcd_rgb["medium green"], lw=3) # plt.plot([0, 1], [0, 3], sns.xkcd_rgb["denim blue"], lw=3) # colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"] # sns.palplot(sns.xkcd_palette(colors)) # ------------------------------------------------------------------------------- # 连续色板——色彩随数据变换,比如数据越重要则颜色就越深 # sns.palplot(sns.color_palette("Blues")) # 如果想要翻转渐变,可以在面板名称中添加一个_r后缀 # sns.palplot(sns.color_palette("BuGn_r")) # 色调线性变化 # 颜色的亮度和饱和度呈线性变化 # sns.palplot(sns.color_palette("cubehelix", 8)) # sns.palplot(sns.cubehelix_palette(8, start=0.5, rot=-0.75)) # light_palette()和dark_palette()调用定制连续调色板 # sns.palplot(sns.light_palette("green")) # sns.palplot(sns.dark_palette("purple")) # 颜色由浅到深变化 # sns.palplot(sns.light_palette("purple", reverse=True)) # 颜色由深到浅变化 sns.palplot(sns.light_palette((210, 90, 60), input="husl")) plt.show()
def heatmap_plot_zscore_bbp(df_zscore_features, df_all, output_dir, title=None): print "heatmap plot:bbp" metric ='m-type' mtypes = np.unique(df_all[metric]) print mtypes mtypes_pal = sns.color_palette("hls", len(mtypes)) mtypes_lut = dict(zip(mtypes, mtypes_pal)) # map creline type to color mtypes_colors = df_all[metric].map(mtypes_lut) layers = np.unique(df_all['layer']) layer_pal = sns.light_palette("green", len(layers)) layers_lut = dict(zip(layers, layer_pal)) layer_colors = df_all['layer'].map(layers_lut) # Create a custom colormap for the heatmap values #cmap = sns.diverging_palette(240, 10, as_cmap=True) linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean') data = df_zscore_features.transpose() row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean') feature_order = hierarchy.leaves_list(row_linkage) #print data.index matchIndex = [data.index[x] for x in feature_order] #print matchIndex data = data.reindex(matchIndex) g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean', linewidths = 0.0,col_colors = [mtypes_colors,layer_colors], cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(40,20)) #g.ax_heatmap.xaxis.set_xticklabels() pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90 ) pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) pl.subplots_adjust(left=0.1, bottom=0.5, right=0.9, top=0.95) # !!!!! #pl.tight_layout( fig, h_pad=20.0, w_pad=20.0) if title: pl.title(title) location ="best" num_cols=1 # Legend for row and col colors for label in mtypes: g.ax_row_dendrogram.bar(0, 0, color=mtypes_lut[label], label=label, linewidth=0.0) g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0) for i in range(3): g.ax_row_dendrogram.bar(0, 0, color = "white", label=" ", linewidth=0) g.ax_row_dendrogram.legend(loc=location, ncol=num_cols, borderpad=0.0) for label in layers: g.ax_row_dendrogram.bar(0, 0, color=layers_lut[label], label=label, linewidth=0.0) g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0) filename = output_dir + '/zscore_feature_heatmap.png' pl.savefig(filename, dpi=300) #pl.show() print("save zscore matrix heatmap figure to :" + filename) pl.close() return linkage
from scipy import misc from PIL import Image import numpy as np im_array = misc.imread("U.png",flatten=True) X = [] Y = [] for i in range(im_array.shape[0]): for j in range(im_array.shape[1]): if im_array[i][j]<10: X.append(i) Y.append(j) X.reverse() X = np.array(X) Y = np.array(Y) import seaborn as sns import matplotlib.pyplot as plt sns.set_style("white") pal = sns.light_palette("green", as_cmap = True) #pal = sns.dark_palette("palegreen", as_cmap=True) with sns.axes_style(None): ax = sns.kdeplot(Y, X, cmap = pal, shade=True, shade_lowest=False) sns.despine(left=True,bottom=True,trim=True) plt.show()
print(df.head(10)) sns.pairplot(df, height=3, aspect=1.5, vars=['salary', 'credit', 'married'], diag_kind='kde', kind='reg') print() print('корреляция между числовыми признаками') fields = ['age', 'salary', 'married', 'children', 'credit'] corr = df[fields].corr() print(corr) mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True sns.heatmap(corr, mask=mask, cmap=sns.light_palette('grey')) from mpl_toolkits.mplot3d import axes3d fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, projection='3d') x1 = df['salary'] y1 = df['married'] z1 = df['credit'] ax.scatter(x1, y1, z1, marker='o', label='blue') plt.figure(figsize=(16, 6)) df.boxplot(column=['salary', 'credit'], vert=False) plt.show() print('построение регрессионно модели') from sklearn.model_selection import train_test_split
def shot_chart_jointgrid(x, y, data=None, title="", joint_type="scatter", marginals_type="both", cmap=None, joint_color="b", marginals_color="b", xlim=(-250, 250), ylim=(422.5, -47.5), joint_kde_shade=True, marginals_kde_shade=True, hex_gridsize=None, space=0, size=(12, 11), court_color="gray", outer_lines=False, court_lw=1, flip_court=False, joint_kws=None, marginal_kws=None, **kwargs): """ Returns a JointGrid object containing the shot chart. TODO: explain the parameters """ # The joint_kws and marginal_kws idea was taken from seaborn # Create the default empty kwargs for joint and marginal plots if joint_kws is None: joint_kws = {} joint_kws.update(kwargs) if marginal_kws is None: marginal_kws = {} # If a colormap is not provided, then it is based off of the joint_color if cmap is None: cmap = sns.light_palette(joint_color, as_cmap=True) # Flip the court so that the hoop is by the bottom of the plot if flip_court: xlim = xlim[::-1] ylim = ylim[::-1] # Create the JointGrid to draw the shot chart plots onto grid = sns.JointGrid(x=x, y=y, data=data, xlim=xlim, ylim=ylim, space=space) # Joint Plot # Create the main plot of the joint shot chart if joint_type == "scatter": grid = grid.plot_joint(plt.scatter, color=joint_color, **joint_kws) elif joint_type == "kde": grid = grid.plot_joint(sns.kdeplot, cmap=cmap, shade=joint_kde_shade, **joint_kws) elif joint_type == "hex": if hex_gridsize is None: # Get the number of bins for hexbin using Freedman-Diaconis rule # This is idea was taken from seaborn, which got the calculation # from http://stats.stackexchange.com/questions/798/ from seaborn.distributions import _freedman_diaconis_bins x_bin = _freedman_diaconis_bins(x) y_bin = _freedman_diaconis_bins(y) hex_gridsize = int(np.mean([x_bin, y_bin])) grid = grid.plot_joint(plt.hexbin, gridsize=hex_gridsize, cmap=cmap, **joint_kws) else: raise ValueError("joint_type must be 'scatter', 'kde', or 'hex'.") # Marginal plots # Create the plots on the axis of the main plot of the joint shot chart. if marginals_type == "both": grid = grid.plot_marginals(sns.distplot, color=marginals_color, **marginal_kws) elif marginals_type == "hist": grid = grid.plot_marginals(sns.distplot, color=marginals_color, kde=False, **marginal_kws) elif marginals_type == "kde": grid = grid.plot_marginals(sns.kdeplot, color=marginals_color, shade=marginals_kde_shade, **marginal_kws) else: raise ValueError("marginals_type must be 'both', 'hist', or 'kde'.") # Set the size of the joint shot chart grid.fig.set_size_inches(size) # Extract the the first axes, which is the main plot of the # joint shot chart, and draw the court onto it ax = grid.fig.get_axes()[0] draw_court(ax, color=court_color, lw=court_lw, outer_lines=outer_lines) # Get rid of the axis labels grid.set_axis_labels(xlabel="", ylabel="") # Get rid of all tick labels ax.tick_params(labelbottom="off", labelleft="off") # Set the title above the top marginal plot ax.set_title(title, y=1.2, fontsize=18) return grid
if __name__ == "__main__": import pandas as pd import numpy as np np.random.seed(24) df = pd.DataFrame({"A": np.linspace(1, 10, 10)}) df = pd.concat( [df, pd.DataFrame(np.random.randn(10, 4), columns=list("BCDE"))], axis=1) df.iloc[3, 3] = np.nan df.iloc[0, 2] = np.nan import seaborn as sns cm = sns.light_palette("green", as_cmap=True) df = df.style.background_gradient(cmap=cm).highlight_null( null_color="red") # element wise # df.style.bar(subset=['A', 'B'], color='#d65f5f') # df.style.bar(subset=['A', 'B'], align='mid', color=['#d65f5f', '#5fba7d']) df = df.applymap(color_highlight_extreme) # .format(None, na_rep="-") df = df.apply(color_highlight_extreme, color="darkorange") df = df.apply( color_highlight_extreme, extreme_func=NDFrameExtremeEnum.max, color="green", axis=None, ) html_ = df.render() # html_ = df.to_html()
import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd import seaborn as sns locale.setlocale(locale.LC_ALL, 'en_US') darkblue, green, red, purple, yellow, lightblue = sns.color_palette('deep') MODALITY_ORDER = ['~0', 'middle', '~1', 'bimodal', 'multimodal'] MODALITY_TO_COLOR = {'~0': lightblue, 'middle': yellow, '~1': red, 'bimodal': purple, 'multimodal': 'lightgrey'} MODALITY_PALETTE = [MODALITY_TO_COLOR[m] for m in MODALITY_ORDER] MODALITY_TO_CMAP = {'~0': sns.light_palette(lightblue, as_cmap=True), 'middle': sns.light_palette(yellow, as_cmap=True), '~1': sns.light_palette(red, as_cmap=True), 'bimodal': sns.light_palette(purple, as_cmap=True), 'multimodal': mpl.cm.Greys} MODALITY_FACTORPLOT_KWS = dict(hue_order=MODALITY_ORDER, palette=MODALITY_PALETTE) def violinplot(x=None, y=None, data=None, bw=0.2, scale='width', inner=None, ax=None, **kwargs): """Wrapper around Seaborn's Violinplot specifically for [0, 1] ranged data What's different: - bw = 0.2: Sets bandwidth to be small and the same between datasets - scale = 'width': Sets the width of all violinplots to be the same - inner = None: Don't plot a boxplot or points inside the violinplot """
r_ref = pd.concat(subsets) r_ref.to_csv(ref_path, index=False) else: r_ref = pd.read_csv(ref_path) # %% pool_over_subjects = False temp = r_ref pipelines = list(temp['pipeline'].unique()) ACROSS_DATASET_PLOTS_FOLDER = PLOTS_FOLDER / 'across_datasets' SELECTED_FOLDER = PLOTS_FOLDER / 'across_datasets' / 'selected_pipelines' os.makedirs(ACROSS_DATASET_PLOTS_FOLDER, exist_ok=True) os.makedirs(SELECTED_FOLDER, exist_ok=True) n_channel_configurations = len(temp['channels'].unique()) with sns.color_palette(sns.light_palette("navy", n_colors=6)[1:]): for compare_pipeline in pipelines: plt.close("all") fig, ax = plt.subplots(1, 1, figsize=(9, 4)) asd_pool = temp.groupby(['dataset', 'pipeline']).aggregate([np.mean, np.std]).reset_index() asd_pool = asd_pool.loc[asd_pool['pipeline'] == compare_pipeline] col_order = asd_pool.sort_values(by=('samples', 'mean'))['dataset'] asd_single = temp.groupby(['dataset', 'subject', 'pipeline']).aggregate([np.mean, np.std]).reset_index() asd_single = asd_single.loc[asd_single['pipeline'] == compare_pipeline] asd = asd_pool if pool_over_subjects else asd_single scatter_alpha = 1 if pool_over_subjects else 0.4 for d in asd['dataset'].unique(): replace_dict = { d: f'{_ds_pretty(d, bold=True)} \n({asd_pool.loc[asd_pool["dataset"] == d]["samples"]["mean"].iloc[0]:1.0f}' f'$\\pm${asd_pool.loc[asd_pool["dataset"] == d]["samples"]["std"].iloc[0]:1.1f})'
#G_mod_PT = np.array(extractFluxes(PT_Flux, 'Modelled_Flux_masked', 2)) #G_mod_OS = np.array(extractFluxes(OS_Flux, 'Modelled_Flux_masked', 2)) # #EF_mod_PT = H_mod_PT/LE_mod_PT #EF_EC = H_EC/LE_EC #%% Plotting timeseries colors = ['#3498db', '#2ecc71', '#f7cf33', '#fc9d1d','#fd484d', '#9b59b6', '#51677b'] #colors = ['#3498db', '#2ecc71', '#f7cf33', '#fA8e63','#fd484d', '#51677b'] colors = ['#3498db', '#2ecc71', '#f7cf33', '#fc9d1d','#fd484d', '#9b59b6', '#51677b'] pal = sns.color_palette(colors) # created from '#e74c3c' from colors and husl input from # http://www.husl-colors.org/ pal_red_light = sns.light_palette((11.4, 97.4, 58.1), input="husl") pal_red_dark = sns.dark_palette((11.4, 97.4, 58.1), input="husl") pal_blue_light = sns.light_palette((242.2, 90.1, 60.2), input="husl") pal_blue_dark = sns.dark_palette((242.2, 90.1, 60.2), input="husl") sns.set() sns.set(context = "poster", style = 'darkgrid', palette = pal, rc = {'axes.labelsize': 20.0, 'figure.figsize': [22, 14], 'legend.fontsize': 20.0, 'xtick.labelsize': 20.0, 'ytick.labelsize': 20.0}) #%% First plot: Air temperature and humidity fig = plt.figure() ax1 = fig.add_subplot(311) Ta_plot = plt.plot_date(ecTime[dateCol_start: dateCol_stop], Ta_C, '-', color = pal[0], alpha = 1.0, lw=2.5,
"outputrunlabel": "alldata2paymentfuns", "numbers_hours_next": [1, 1.5, 2], # [1, 1.5, 2],#[.75, 1, 1.25, 1.5],#, 1.25, 1.5], "match_functions": [ get_match_for_row_lastdriverinarea_dispatchtime, get_match_for_row_nextdrivermatched, ], "functions_to_run": [ "plot_tripindifference_histogram", "plot_drivershift_earnings", "supplementary_facts", # "plot_tripindifference_variancebyaddmult", ], "plot_colors": [ sns.light_palette("black", 2, input="xkcd").as_hex()[-1], sns.light_palette((210, 90, 60), 2, input="husl").as_hex()[-1], ], # "payment_functions": payment_functions_2months_withmin, # "payment_function_names": payment_function_2months_withmin_names, "skip_mimicfare_in_plot_stuff": False, } settings_plotting_puresurgeonly = copy.copy(settings_server_2months) settings_plotting_puresurgeonly.update(plotting_differences) settings_plotting_puresurgeonly["outputrunlabel"] = "pureonly" settings_plotting_puresurgeonly[ "payment_functions"] = payment_functions_2months_pureonly settings_plotting_puresurgeonly[ "payment_function_names"] = payment_function_2months_pureonly_names