def dropplot(data, feature='median_conservation', genome_len=10**4): mapping = {} vals = np.sort(data[feature].unique()) for i, cons in enumerate(vals): mapping[str(cons)] = i n_colors = 2 if vals.shape[0] > 2: n_colors = max(8, vals.shape[0]) with sns.plotting_context( rc={ "font.size": 14, "axes.titlesize": 18, "axes.labelsize": 18, "xtick.labelsize": 14, "ytick.labelsize": 14, 'y.labelsize': 16 }): pal = sns.mpl_palette('seismic', n_colors) with sns.plotting_context( rc={ "font.size": 12, "axes.labelsize": 15, "xtick.labelsize": 14, "ytick.labelsize": 12, 'aspect': 10 }): f, ax = plt.subplots(figsize=(14, 4)) for i, seq in enumerate(g['seq_id'].unique()): g_tag = data[data['seq_id'] == seq] ax.plot([1, genome_len], [i, i], color="black", alpha=0.7, linewidth=4) for row in g_tag.iterrows(): row = row[1] ax.scatter([row['start'], row['end']], [i, i], marker='s', s=2 * row['drop_size'], c=pal[mapping[str(row[feature])]], label="{} {}".format(row['product'], row['start'])) plt.legend(bbox_to_anchor=[1.1, 1.1]) sns.palplot(sns.mpl_palette('seismic', n_colors)) plt.show()
def plot_heatmap(X, y, top_n=10, metric='correlation', method='complete'): ''' Plot heatmap which shows features with classes. :param X: list of dict :param y: labels :param top_n: most important n feature :param metric: metric which will be used for clustering :param method: method which will be used for clustering ''' sns.set(color_codes=True) df = feature_importance_report(X, y) df_sns = pd.DataFrame().from_records(X)[df[:top_n].index].T df_sns.columns = y color_mapping = dict(zip(set(y), sns.mpl_palette("Set2", len(set(y))))) return sns.clustermap(df_sns, figsize=(22, 22), z_score=0, metric=metric, method=method, col_colors=[color_mapping[i] for i in y])
def qualitative_cmap(n_colors=17): """Returns a colormap suitable for a categorical plot with many categories. Parameters ---------- n_colors : int, default is 17 The number of colors that, usually, matches with the number of categories. Returns ------- list A list of hex colors. """ set1 = sns.mpl_palette("Set1", n_colors=9) hex_colors = [rgb2hex(rgb) for rgb in set1] hex_colors[5] = '#FFDE00' if n_colors <= 9: return hex_colors if n_colors <= 17: n_colors = 17 else: n_colors = 8 * ceil((n_colors - 1) / 8) gradient = polylinear_gradient(hex_colors, n_colors) return gradient
def plot_pca(pX, palette='Spectral', labels=None, ax=None, colors=None): """Plot PCA result, input should be a dataframe""" if ax==None: fig,ax=plt.subplots(1,1,figsize=(6,6)) cats = pX.index.unique() colors = sns.mpl_palette(palette, len(cats)+1) print (len(cats), len(colors)) for c, i in zip(colors, cats): #print (i, len(pX.ix[i])) #if not i in pX.index: continue ax.scatter(pX.ix[i, 0], pX.ix[i, 1], color=c, s=90, label=i, lw=.8, edgecolor='black', alpha=0.8) ax.set_xlabel('PC1') ax.set_ylabel('PC2') i=0 if labels is not None: for n, point in pX.iterrows(): l=labels[i] ax.text(point[0]+.1, point[1]+.1, str(l),fontsize=(9)) i+=1 ax.legend(fontsize=10,bbox_to_anchor=(1.5, 1.05)) sns.despine() plt.tight_layout() return
def plotClusterExpression(self, cluster_labels, row_cluster=False, col_clusters=False, yticks=False, xticks=False): # Plots heatmap with labels for columns sorted together expression = pd.DataFrame.copy(self.expression) expression.columns = cluster_labels expression.sort_index(axis=1, inplace=True) lut = dict(zip(set(expression.columns), sns.mpl_palette("hsv", len(set(expression.columns))))) col_colors = pd.DataFrame(expression.columns)[0].map(lut) expression.columns.name = "Cells clusters" expression.index.name = "Genes" sns_plot = sns.clustermap(expression, col_colors=col_colors.values, col_cluster=col_clusters, row_cluster=row_cluster, cmap="gnuplot2", yticklabels=yticks, xticklabels=xticks) #sns_plot.ax_heatmap.set_xlabel = "Cells cluster" #sns_plot.ax_heatmap.set_ylabel = "Genes" # Add legend for clusters for label in sorted(set(cluster_labels)): sns_plot.ax_col_dendrogram.bar(0, 0, color=lut[label], label=label, linewidth=0) sns_plot.ax_col_dendrogram.legend(loc="center", ncol=6) # Move color bar sns_plot.cax.set_position([.15, .2, .03, .45])
def figure_S2d(): nonneuronal_final = load_adata("nonneuronal") nonneuronal_sample_props = get_sample_proportions( nonneuronal_final, "cluster_final", "sample_name" ) fig2 = plot_celltype_proportions( nonneuronal_sample_props, sns.mpl_palette("tab20", 4)[::-1] ) save_figure(fig2, "figure_S02", "figS2d_nonneuronal_sample_props")
def __color_generator(palette, n): if type(palette) in (list, tuple, set): for col in itertools.islice(itertools.cycle(palette), n): yield(col) elif type(palette) == str: palette = sns.mpl_palette(palette, n) for i in range(n): yield(palette[i]) else: raise NanocomporeError ("Invalid palette type")
class FavorGradColor: GreyBlueRed = gradients.gradient_hsl( (230, 235, 240), (30, 40, 60), 150, value_scale=256)[::-1] + gradients.gradient_hsl( (245, 250, 255), (230, 235, 240), 35, value_scale=256)[::-1] + gradients.gradient_hsl( (255, 250, 245), (240, 230, 220), 35, value_scale=256) + gradients.gradient_hsl( (240, 230, 220), (100, 10, 10), 150, value_scale=256) BlueRed = sns.color_palette('RdBu_r', n_colors=400) DardRed = sns.mpl_palette("Reds_d", 400)[::-1]
def rand_color_palette(N): col = [] colors = sns.mpl_palette('Set1', 9) for j in range(N): if j == 9: colors = sns.mpl_palette('Set3', 12) elif j == 21: colors = sns.mpl_palette('Set2', 8) elif j == 29: colors = list(sns.crayons.keys()) i = np.random.randint(0, high=len(colors)) if j >= 29: col += [sns.crayon_palette(colors.pop(i))] else: col += [colors.pop(i)] return col
def plot_pca(pX, plot3d=False, palette='Spectral', labels=False, ax=None, colors=None, **kwargs): """Plot PCA result, input should be a dataframe""" if ax == None: fig, ax = plt.subplots(1, 1, figsize=(6, 6)) #print (kwargs) colormap = kwargs['colormap'] fs = kwargs['fontsize'] ms = kwargs['ms'] * 12 kwargs = {k: kwargs[k] for k in ('linewidth', 'alpha')} cats = pX.index.unique() import seaborn as sns colors = sns.mpl_palette(colormap, len(cats)) for c, i in zip(colors, cats): print(i, len(pX.ix[i])) if plot3d == True: ax.scatter(pX.ix[i, 0], pX.ix[i, 1], pX.ix[i, 2], color=c, s=ms, label=i, edgecolor='black', **kwargs) else: ax.scatter(pX.ix[i, 0], pX.ix[i, 1], color=c, s=ms, label=i, edgecolor='black', **kwargs) ax.set_xlabel('PC1') ax.set_ylabel('PC2') if labels == True: for i, point in pX.iterrows(): ax.text(point[0] + .3, point[1] + .3, str(i), fontsize=(9)) if len(cats) < 20: ax.legend(fontsize=fs * .8) return
def plot_by_label(X, palette='Set1'): """Color scatter plot by dataframe index label""" import seaborn as sns cats = X.index.unique() colors = sns.mpl_palette(palette, len(cats)) #sns.palplot(colors) f,ax = plt.subplots(figsize=(6,6)) for c, i in zip(colors, cats): #print X.ix[i,0] ax.scatter(X.ix[i, 0], X.ix[i, 1], color=c, s=100, label=i, lw=1, edgecolor='black') ax.legend(fontsize=10) sns.despine() return
def plot_by_label(X, palette='Set1'): """Color scatter plot by dataframe index label""" import seaborn as sns cats = X.index.unique() colors = sns.mpl_palette(palette, len(cats)) #sns.palplot(colors) f, ax = plt.subplots(figsize=(6, 6)) for c, i in zip(colors, cats): #print X.ix[i,0] ax.scatter(X.ix[i, 0], X.ix[i, 1], color=c, s=100, label=i, lw=1, edgecolor='black') ax.legend(fontsize=10) sns.despine() return
def graph_utility_scaled_cap_colour(metrics: Metrics, path_prefix: str): fig = plt.figure() ax = fig.gca() sources_utilities = {(b.source, b.capability) for b in metrics.buffers} grouped_utility = { (asrc, acap): [ (b.t, b.utility / b.max_utility) for b in metrics.buffers if asrc == b.source and acap == b.capability ] for (asrc, acap) in sources_utilities } sequential_cmaps = [seaborn.mpl_palette(name, n_colors=len(metrics.agent_names)) for name in ("Greens", "Purples")] cmap_for_cap = { c: sequential_cmaps[c] for c in {int(c[1:]) for c in metrics.capability_names} } for ((src, cap), utilities) in sorted(grouped_utility.items(), key=lambda x: x[0]): X, Y = zip(*utilities) ax.plot(X, Y, label=f"{src} {cap}", color=cmap_for_cap[int(cap[1:])][metrics.agent_names.index(src)]) ax.set_ylim(0, 1) ax.set_xlabel('Time (secs)') ax.set_ylabel('Normalised Utility (\\%)') ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, symbol='')) ax.legend(bbox_to_anchor=(1.5, 1), loc="upper right", ncol=2) savefig(fig, f"{path_prefix}norm-utility-cc.pdf") plt.close(fig)
def get_colormap(groups: Optional[List[int]] = None) -> np.ndarray: if groups is None: return sns.color_palette() # Color palette names names = ["Blues", "Reds", "Greens", "Purples", "Greys"] # Get group size (and check that group indices are consecutive) n = check_groups(groups) if n > len(names): raise ValueError("Too many groups for the available color palettes.") # Setup n color palettes, indexed by group # Get a MLP palette by name (as list of RGB values), reverse the color order # (with [::-1]) and make it iterable (so that next can be called later) palettes = {g: iter(sns.mpl_palette(names[g])[::-1]) for g in range(n)} colors = [] for group in groups: colors.append(next(palettes[group])) return np.asarray(colors)
def create_cmap(name: str = None, palette_type: str = None, as_cmap: bool = True, **kwargs) -> Union[list, plt.Axes]: """Create a colormap or color palette object. Parameters ---------- name Name of the pyrates colormap. If specified, palette_type will be ignored. palette_type Type of the seaborn color palette to use. Only necessary if no name is specified. as_cmap If true, a matplotlib colormap object will be returned. Else a seaborn color palette (list). kwargs Keyword arguments for the wrapped seaborn functions. Returns ------- Union[list, plt.Axes] cmap or seaborn color palette. """ from seaborn import cubehelix_palette, dark_palette, light_palette, diverging_palette, hls_palette, husl_palette, \ color_palette, crayon_palette, xkcd_palette, mpl_palette import matplotlib.colors as mcolors if '/' in name: # create diverging colormap name1, name2 = name.split('/') vmin = kwargs.pop('vmin', 0.) vmax = kwargs.pop('vmax', 1.) if type(vmin) is float: vmin = (vmin, vmin) if type(vmax) is float: vmax = (vmax, vmax) kwargs1 = kwargs.pop(name1, kwargs) kwargs2 = kwargs.pop(name2, kwargs) cmap1 = create_cmap(name1, **kwargs1, as_cmap=True) cmap2 = create_cmap(name2, **kwargs2, as_cmap=True) n = kwargs.pop('n_colors', 10) if type(n) is int: n = (n, n) colors = np.vstack((cmap1(np.linspace(vmin[0], vmax[0], n[0])), cmap2(np.linspace(vmin[1], vmax[1], n[1])[::-1]))) return mcolors.LinearSegmentedColormap.from_list('cmap_diverging', colors) # extract colorrange if as_cmap: vmin = kwargs.pop('vmin', 0.) vmax = kwargs.pop('vmax', 1.) n = kwargs.pop('n_colors', 10) crange = np.linspace(vmin, vmax, n) if vmax-vmin < 1. else None else: crange = None if 'pyrates' in name: # create pyrates colormap if name == 'pyrates_red': cmap = cubehelix_palette(as_cmap=as_cmap, start=-2.0, rot=-0.1, **kwargs) elif name == 'pyrates_green': cmap = cubehelix_palette(as_cmap=as_cmap, start=2.5, rot=-0.1, **kwargs) elif name == 'pyrates_blue': cmap = dark_palette((210, 90, 60), as_cmap=as_cmap, input='husl', **kwargs) elif name == 'pyrates_yellow': cmap = dark_palette((70, 95, 65), as_cmap=as_cmap, input='husl', **kwargs) elif name == 'pyrates_purple': cmap = dark_palette((270, 50, 55), as_cmap=as_cmap, input='husl', **kwargs) else: # create seaborn colormap if palette_type == 'cubehelix': cmap = cubehelix_palette(name, as_cmap=as_cmap, **kwargs) elif palette_type == 'dark': cmap = dark_palette(name, as_cmap=as_cmap, **kwargs) elif palette_type == 'light': cmap = light_palette(name, as_cmap=as_cmap, **kwargs) elif palette_type == 'hls': cmap = hls_palette(name, **kwargs) elif palette_type == 'husl': cmap = husl_palette(name, **kwargs) elif palette_type == 'diverging': cmap = diverging_palette(name, as_cmap=as_cmap, **kwargs) elif palette_type == 'crayon': cmap = crayon_palette(name, **kwargs) elif palette_type == 'xkcd': cmap = xkcd_palette(name, **kwargs) elif palette_type == 'mpl': cmap = mpl_palette(name, **kwargs) else: cmap = color_palette(name, **kwargs) # apply colorrange if crange is not None: cmap = mcolors.LinearSegmentedColormap.from_list(name, cmap(crange)) return cmap
import seaborn as sns sns.set() sns.palplot(sns.mpl_palette("Set2", 8))
#!/usr/bin/env python # -*- coding: utf-8 -*- import numpy as np import matplotlib import matplotlib.pyplot as plt import seaborn as sns import cmocean as cmo red_colormap = color_map = sns.blend_palette( ["0.9", sns.xkcd_rgb["bright red"]], as_cmap=True) tab20a = sns.mpl_palette("tab20_r", 20) tab20b = sns.mpl_palette("tab20b_r", 20) tab20c = sns.mpl_palette("tab20c_r", 20) main_palette = (tab20c[17:] + [tab20a[1]] + tab20c[6:8] + tab20b[17:] + tab20b[5:7] + [tab20a[8]] + [tab20b[2]] + [tab20b[1]] + tab20b[9:11] + tab20c[9:12] + [tab20c[14]]) nonneuronal_palette = (tab20c[8:12] + tab20b[14:16] + tab20b[4:7] + tab20c[18:] + tab20b[9:11] + tab20b[1:3] + tab20b[16:19:2] + [tab20a[8]]) neuronal_palette = (tab20c[13:16:2] + tab20c[9:12] + tab20b[1:3] + tab20b[4:7] + tab20c[16:] + [tab20a[1]] + [tab20b[9]] + [tab20a[5]] + tab20b[13:15] + [tab20a[9]]) neuronal_nonneuronal_palette = sns.xkcd_palette(["cerulean", "kelly green"]) heatmap_cmap = cmo.tools.cmap(cmo.cm.balance(np.linspace(0, 1, 256), 0.9)) gaba_vs_glut_vs_hdc_palette = sns.xkcd_palette( ["yellow orange", "medium blue", "tree green"])
ransac.fit(np.array(user_count).reshape(-1, 1), np.array(answerer_count).reshape(-1, 1)) inlier_mask = ransac.inlier_mask_ r_answerer.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(answerer_count).reshape(-1, 1)[inlier_mask])) ransac = linear_model.RANSACRegressor() ransac.fit(np.array(user_count).reshape(-1, 1), np.array(commenter_count).reshape(-1, 1)) inlier_mask = ransac.inlier_mask_ r_commenter.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(commenter_count).reshape(-1, 1)[inlier_mask])) current_site = row[0] asker_count[:] = [] answerer_count[:] = [] commenter_count[:] = [] user_count[:] = [] asker_count.append(int(row[6])) answerer_count.append(int(row[7])) commenter_count.append(int(row[12])) user_count.append(int(row[14])) df = pd.DataFrame({'Asker': r_asker, 'Answerer': r_answerer, 'Commenter': r_commenter}) ax = sns.lvplot(data = df, palette=sns.mpl_palette("gist_yarg")) ax.set(ylabel='Coeff. of Determination, $R^2$') ax.set_yticks(np.arange(0.0, 1.0, 0.05), minor=True) sns.despine(offset = 10, trim=True, bottom = True) sns.plt.tight_layout() plt.savefig('User_to_Roles_R_Squared_LV.pdf')
t_sne = make_pipeline(ExtractNames(), DictVectorizer()) clf0 = t_sne.fit_transform(speaker_gender["name_in_profile"]) from sklearn.manifold import TSNE tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23) new_values = tsne_model.fit_transform(clf0.todense()) import seaborn as sns colors = sns.mpl_palette("Dark2", 7) points = np.array(new_values) import matplotlib.pyplot as plt colors_p = speaker_gender['he_she'].map(lambda x: colors[x]).tolist() for i in range(len(points)): plt.scatter(points[i, 0], points[i, 1], color=colors_p[i]) # plt.colorbar() plt.savefig('t_sne_2d.png', dpi=200) plt.clf() plt.cla() plt.close() tsne_model_3d = TSNE(perplexity=40, n_components=3,
import pandas as pd import matplotlib.pyplot as plt array = [ [500, 0, 0, 0, 0, 0, 0, 0, 39, 0, 3, 0, 1, 1, 0, 7], # A [3, 432, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # B [1, 121, 513, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], # C [1, 0, 0, 522, 31, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 34], # D [0, 0, 0, 33, 532, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 17], # E [0, 0, 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # F [3, 3, 2, 0, 0, 0, 559, 0, 0, 0, 2, 0, 0, 0, 0, 0], # G [0, 0, 0, 0, 0, 0, 0, 501, 7, 0, 0, 0, 0, 0, 0, 0], # H [1, 0, 0, 0, 0, 0, 0, 0, 1918, 0, 0, 0, 0, 0, 0, 1], # I [1, 0, 0, 0, 0, 0, 0, 0, 1, 37, 0, 0, 0, 0, 0, 0], # J [10, 0, 0, 1, 0, 0, 1, 0, 0, 0, 358, 6, 0, 1, 0, 46], # K [18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 578, 0, 0, 0, 2], # L [1, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 1063, 0, 1, 0], # M [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 188, 1, 0], # N [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 395, 1], # O [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 165], # P ] df_cm = pd.DataFrame(array, index=[i for i in "ABCDEFGHIJKLMNOP"], columns=[i for i in "ABCDEFGHIJKLMNOP"]) plt.figure(figsize=(13, 10)) sn.heatmap(df_cm, annot=True, fmt="d", linewidths=.5, cmap=sn.mpl_palette("Set3_r", 20)) plt.savefig("output.svg")
for i, p in enumerate(new_values): dic_point[labels[i]] = p # x, y = zip(*new_values) # for i in range(len(x)): # plt.scatter(x[i], y[i]) # plt.annotate(labels[i], xy=(x[i], y[i]), xytext=(5, 2), # textcoords='offset points', ha='right', va='bottom') # plt.show() from scipy.spatial import ConvexHull # http://matplotlib.org/examples/color/colormaps_reference.html import seaborn as sns colors = sns.mpl_palette("Dark2", len(sentence_list)) from shapely.geometry import box, Polygon def sent_plot(i): sent = sentence_list[i] all_w = list( set([w for w in sent.lower().split(' ') if w in embeddings_index])) points = np.array([dic_point[w] for w in all_w]) hull = ConvexHull(points) plt.plot(points[:, 0], points[:, 1], 'o', c=colors[i]) for simplex in hull.simplices: plt.plot(points[simplex, 0], points[simplex, 1], 'k-') plt.plot(points[hull.vertices, 0], points[hull.vertices, 1],
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt sns.set(style="whitegrid") graycolors = sns.mpl_palette('Set2', 2) plt.rcParams.update({'figure.max_open_warning': 0}) df = pd.read_csv("./bytecode_instruction_costs.csv") for col in df.columns: sns.distplot(df[col]) for i, col in enumerate(df.columns): plt.figure(i) df_col = df[col] median = df_col.median() mode = df_col.mode().get_values()[0] mean = df_col.mean() print 'Instruction: ', col print '\tMean: ', mean print '\tMedian: ', median print '\tMode: ', mode plt.axvline(median, color='r', linestyle='--') plt.axvline(mode, color='g', linestyle='-') plt.axvline(mean, color='b', linestyle='-') plt.legend({'Mean': mean, 'Mode': mode, 'Median': median}) sns.distplot(df[col], norm_hist=True) plt.show()
def main(): args = parse_args() meta = load_split(args.split) meta["lang"].replace(iso639_3, inplace=True) if args.embeddings: representations = get_fingerprints(args.embeddings, meta.filename) elif args.model: representations = get_model_representations(args.model, args.split) logger.info("Computing embeddings ...") if args.decomposition == "tsne": decomposer = TSNE(n_components=2, verbose=True) elif args.decomposition == "pca": decomposer = PCA(n_components=2) embedded = decomposer.fit_transform(representations) meta["Component 1"] = embedded[:, 0] meta["Component 2"] = embedded[:, 1] meta.rename( { "cefr": "CEFR", "testlevel": "Test level", "num_tokens": "Length", "lang": "L1" }, axis="columns", inplace=True) meta["Test level"].replace( { "Språkprøven": "IL test", "Høyere nivå": "AL test" }, inplace=True) fig, ax = (plt.gcf(), plt.gca()) if args.hue == "CEFR": palette = sns.mpl_palette('cool', 7) hue_order = CEFR_LABELS else: palette = None hue_order = None sns.scatterplot( x="Component 1", y="Component 2", hue=args.hue, style="Test level", data=meta, ax=ax, size="Length", palette=palette, hue_order=hue_order, ) ax.tick_params( axis="both", which="both", bottom="off", top="off", labelbottom="off", right="off", left="off", labelleft="off", ) handles, labels = ax.get_legend_handles_labels() cefr_legend = ax.legend(handles[:8], labels[:8], loc="center right", bbox_to_anchor=(-0.1, 0.5)) ax.legend(handles[8:], labels[8:], loc="center left", bbox_to_anchor=(1.05, 0.5)) ax.add_artist(cefr_legend) fig.set_size_inches(5, 3) plt.tight_layout() plt.show()
def plot_curve(data: list, fname: str, class_counts: tuple, is_roc: bool = True, min_score_fraction: float = 0.5): """ Plot ROC or pr curves for all tools at a given bin :param list data: ROC analysis results for all the tools at the given intronic bin :param str fname: Output basename :param tuple class_counts: Number of positive and negative variants at the given intronic bin :param bool is_roc: Whether analysis refers to ROC curve. If `False`, precision-recall curves are drawn. Default: `True` :param float min_score_fraction: Minimum fraction of predictive power of a given tool for the curve to be drawn. Default: `0.5` """ if is_roc: colnames = [ 'tool', 'fraction_nan', 'label', 'thresholds', 'True Positive Rate (TPR)', 'False Positive Rate (FPR)', 'roc_auc' ] to_explode = [ 'thresholds', 'True Positive Rate (TPR)', 'False Positive Rate (FPR)' ] else: colnames = [ 'tool', 'fraction_nan', 'label', 'thresholds', 'Recall', 'Precision', 'ap_score' ] to_explode = ['thresholds', 'Recall', 'Precision'] df_metrics = pd.DataFrame.from_records(data, columns=colnames) df_metrics = df_metrics.reset_index().apply(lambda x: x.explode() if x.name in to_explode else x) if is_roc: df_metrics['True Positive Rate (TPR)'] = pd.to_numeric( df_metrics['True Positive Rate (TPR)']) df_metrics['False Positive Rate (FPR)'] = pd.to_numeric( df_metrics['False Positive Rate (FPR)']) df_metrics["tool_with_roc_auc"] = df_metrics["label"] + " auROC=" + \ df_metrics["roc_auc"].round(2).map(str) + ")" hue = "tool_with_roc_auc" x = "False Positive Rate (FPR)" y = "True Positive Rate (TPR)" df_metrics = df_metrics.sort_values('roc_auc', ascending=False) else: df_metrics['Recall'] = pd.to_numeric(df_metrics['Recall']) df_metrics['Precision'] = pd.to_numeric(df_metrics['Precision']) df_metrics["tool_with_ap_score"] = df_metrics["label"] + " AP=" + \ df_metrics["ap_score"].round(2).map(str) + ")" hue = "tool_with_ap_score" x = "Recall" y = "Precision" df_metrics = df_metrics.sort_values('ap_score', ascending=False) df_metrics = df_metrics[df_metrics['fraction_nan'] <= min_score_fraction] # Since S-CAP has several different reference # threshold, S-CAP is removed from these analyses df_metrics = df_metrics[~df_metrics.tool.str.contains("S-CAP")] # If many tools to plot, change color pallette if df_metrics.tool.unique().size > 12: sns.set_palette(sns.mpl_palette("magma", df_metrics.tool.unique().size)) else: sns.set_palette(sns.color_palette("Paired")) ax = sns.lineplot(x=x, y=y, data=df_metrics, hue=hue) ax.set_aspect(1.15) plt.title("N pos = {}; N neg = {}".format(class_counts[0], class_counts[1])) plt.legend(bbox_to_anchor=(1.1, 1), loc=2, borderaxespad=0.) plt.ylim(0, 1.05) plt.tight_layout() out = fname + '.pdf' plt.savefig(out) plt.close() sns.reset_defaults()
def genSpecColors(numCols, colType): # if manualCols or numCols > 19: if colType == "mc": hsvCols = [(x / numCols, 1, 0.75) for x in range(numCols)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsvCols)) colors = [[255 * color[0], 255 * color[1], 255 * color[2]] for color in colors] # CHP elif colType == "chp": colors = sns.cubehelix_palette(numCols) elif colType == "chp_rnd4": colors = sns.cubehelix_palette(numCols, rot=-.4) elif colType == "chp_s2d8_rd1": colors = sns.cubehelix_palette(numCols, start=2.8, rot=.1) # MPLP elif colType == "mplp_GnBu_d": colors = sns.mpl_palette("GnBu_d", numCols) elif colType == "mplp_seismic": colors = sns.mpl_palette("seismic", numCols) # CP_Misc elif colType == "cp": colors = sns.color_palette(n_colors=numCols) elif colType == "cp_Accent": colors = sns.color_palette("Accent", n_colors=numCols) elif colType == "cp_cubehelix": colors = sns.color_palette("cubehelix", n_colors=numCols) elif colType == "cp_flag": colors = sns.color_palette("flag", n_colors=numCols) elif colType == "cp_Paired": colors = sns.color_palette("Paired", n_colors=numCols) elif colType == "cp_Pastel1": colors = sns.color_palette("Pastel1", n_colors=numCols) elif colType == "cp_Pastel2": colors = sns.color_palette("Pastel2", n_colors=numCols) elif colType == "cp_tab10": colors = sns.color_palette("tab10", n_colors=numCols) elif colType == "cp_tab20": colors = sns.color_palette("tab20", n_colors=numCols) elif colType == "cp_tab20c": colors = sns.color_palette("tab20c", n_colors=numCols) # CP_Rainbow elif colType == "cp_gistncar": colors = sns.color_palette("gist_ncar", n_colors=numCols) elif colType == "cp_gistrainbow": colors = sns.color_palette("gist_rainbow", n_colors=numCols) elif colType == "cp_hsv": colors = sns.color_palette("hsv", n_colors=numCols) elif colType == "cp_nipyspectral": colors = sns.color_palette("nipy_spectral", n_colors=numCols) elif colType == "cp_rainbow": colors = sns.color_palette("rainbow", n_colors=numCols) # CP_Grad2 elif colType == "cp_afmhot": colors = sns.color_palette("afmhot", n_colors=numCols) elif colType == "cp_autumn": colors = sns.color_palette("autumn", n_colors=numCols) elif colType == "cp_binary": colors = sns.color_palette("binary", n_colors=numCols) elif colType == "cp_bone": colors = sns.color_palette("bone", n_colors=numCols) elif colType == "cp_cividis": colors = sns.color_palette("cividis", n_colors=numCols) elif colType == "cp_cool": colors = sns.color_palette("cool", n_colors=numCols) elif colType == "cp_copper": colors = sns.color_palette("copper", n_colors=numCols) elif colType == "cp_hot": colors = sns.color_palette("hot", n_colors=numCols) elif colType == "cp_inferno": colors = sns.color_palette("inferno", n_colors=numCols) elif colType == "cp_magma": colors = sns.color_palette("magma", n_colors=numCols) elif colType == "cp_mako": colors = sns.color_palette("mako", n_colors=numCols) elif colType == "cp_plasma": colors = sns.color_palette("plasma", n_colors=numCols) elif colType == "cp_PuBuGn": colors = sns.color_palette("PuBuGn", n_colors=numCols) elif colType == "cp_Purples": colors = sns.color_palette("Purples", n_colors=numCols) elif colType == "cp_RdPu": colors = sns.color_palette("RdPu", n_colors=numCols) elif colType == "cp_rocket": colors = sns.color_palette("rocket", n_colors=numCols) elif colType == "cp_spring": colors = sns.color_palette("spring", n_colors=numCols) elif colType == "cp_summer": colors = sns.color_palette("summer", n_colors=numCols) elif colType == "cp_viridis": colors = sns.color_palette("viridis", n_colors=numCols) elif colType == "cp_winter": colors = sns.color_palette("winter", n_colors=numCols) elif colType == "cp_Wistia": colors = sns.color_palette("Wistia", n_colors=numCols) elif colType == "cp_YlOrRd": colors = sns.color_palette("YlOrRd", n_colors=numCols) # CP_Grad3 elif colType == "cp_BrBG": colors = sns.color_palette("BrBG", n_colors=numCols) elif colType == "cp_brg": colors = sns.color_palette("brg", n_colors=numCols) elif colType == "cp_bwr": colors = sns.color_palette("bwr", n_colors=numCols) elif colType == "cp_CMRmap": colors = sns.color_palette("CMRmap", n_colors=numCols) elif colType == "cp_gistearth": colors = sns.color_palette("gist_earth", n_colors=numCols) elif colType == "cp_giststern": colors = sns.color_palette("gist_stern", n_colors=numCols) elif colType == "cp_gnuplot": colors = sns.color_palette("gnuplot", n_colors=numCols) elif colType == "cp_gnuplot2": colors = sns.color_palette("gnuplot2", n_colors=numCols) elif colType == "cp_icefire": colors = sns.color_palette("icefire", n_colors=numCols) elif colType == "cp_ocean": colors = sns.color_palette("ocean", n_colors=numCols) elif colType == "cp_PiYG": colors = sns.color_palette("PiYG", n_colors=numCols) elif colType == "cp_PRGn": colors = sns.color_palette("PRGn", n_colors=numCols) elif colType == "cp_prism": colors = sns.color_palette("prism", n_colors=numCols) elif colType == "cp_RdBu": colors = sns.color_palette("RdBu", n_colors=numCols) elif colType == "cp_RdGy": colors = sns.color_palette("RdGy", n_colors=numCols) elif colType == "cp_RdYlBu": colors = sns.color_palette("RdYlBu", n_colors=numCols) elif colType == "cp_RdYlGn": colors = sns.color_palette("RdYlGn", n_colors=numCols) elif colType == "cp_seismic": colors = sns.color_palette("seismic", n_colors=numCols) elif colType == "cp_Spectral": colors = sns.color_palette("Spectral", n_colors=numCols) elif colType == "cp_terrein": colors = sns.color_palette("terrein", n_colors=numCols) elif colType == "cp_vlag": colors = sns.color_palette("vlag", n_colors=numCols) else: hsvCols = [(x / numCols, 1, 0.75) for x in range(numCols)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsvCols)) colors = [[255 * color[0], 255 * color[1], 255 * color[2]] for color in colors] return colors
6: '#bcc2f2', 7: '#eebcbc', 8: '#f1f0c0', 9: '#d2ffe7', 10: '#caf3a6', 11: '#ffdf55', 12: '#ef77aa', 13: '#d6dcff', 14: '#d2f5f0' } df_nodes['c'] = pd.Categorical.from_array(df_nodes.group).labels plt.figure(figsize=(25, 25)) group_len = len(df_nodes['group'].value_counts()) import seaborn as sns colors = sns.mpl_palette("tab20", group_len) options = { 'nodelist': df_nodes['name'].tolist(), 'node_size': [nodesize * 1.1 for nodesize in df_nodes['nodesize'].tolist()], #'node_color': [colors[group - 1] for group in df_nodes['group'].tolist()], 'node_color': [colors[c] for c in df_nodes['c'].tolist()], 'edgelist': list(zip(df_edges['source'], df_edges['target'])), 'width': [value * 0.1 for value in df_edges['value'].tolist()], 'edge_color': 'gray', 'with_labels': True, 'alpha': 1, 'font_weight': 'regular', } """
fig.subplots_adjust(wspace=0.02) plt.rcParams['text.usetex'] = False plt.rcParams['font.family'] = "sans-serif" plt.rcParams['font.sans-serif'] = "Helvetica" plt.rcParams['axes.labelsize'] = 16 plt.rcParams['font.size'] = 16 plt.rcParams['legend.fontsize'] = 16 plt.rcParams['xtick.labelsize'] = 16 plt.rcParams['ytick.labelsize'] = 16 ax = fig.add_subplot(111) from matplotlib import cm import seaborn as sns sns.set() colours = sns.mpl_palette("Set2", 4) for i, lambdax in enumerate([0.0, 1.0, 3.0]): ax.plot(gsw_vals, y[i, :], label="$\lambda$ = %d" % (lambdax), color=colours[i], lw=2) if lambdax > 0.0: ax.plot(gsw_max[i], y_max[i], marker="o", color=colours[i]) from matplotlib.ticker import MaxNLocator ax.yaxis.set_major_locator(MaxNLocator(5)) ax.xaxis.set_major_locator(MaxNLocator(5)) ax.set_xlabel(r"g$_{\mathrm{s}}$ (mol m$^{-2}$ s$^{-1}$)")
import matplotlib.pyplot as plt import numpy as np import pandas as pd import requests import seaborn as sns from sklearn.metrics import r2_score ## Figure layout plt.rcParams.update({'font.size': 15, 'figure.figsize': (11, 5)}) sns.set(style='ticks', palette="deep") sns.palplot(sns.mpl_palette("RdBu")) ### Load data sets resp = requests.get('http://api.statbank.dk/v1/data/FOLK1A/CSV?lang=en') dst_url_a = 'http://api.statbank.dk/v1/data/FT/CSV?lang=en&TID=*' data = pd.read_csv(dst_url_a, sep=';') data.rename(columns={'INDHOLD': 'Population', 'TID': 'Year'}, inplace=True) extra_years = [y for y in np.arange(2020, 2055, 5) if y > data.Year.max()] data = data.append(pd.DataFrame(extra_years, columns=['Year'])) p1975 = data.Year < 1976 p2018 = data.Year < 2019 ### Define a plot function def make_pop_plot(period=p1975): f, ax = plt.subplots(figsize=(10, 5)) ax.scatter(data[period].Year,
def plot_features( args, sim_path: str, real_path: str, vcf_path: str, out_dir_path: str ): """Generate pairwise plot of simulated and 'real' features Args: args (argparse.Namespace): Additional command line arguments sim_path (str): Path to NPSV features from 'simulated' data real_path (str): Path to NPSV features from 'real' data vcf_path (str): Path to input VCF file out_dir_path (str): Directory for plot files """ # Create output directory if it doesn't exist os.makedirs(out_dir_path, exist_ok=True) logging.info("Generating plots in %s", out_dir_path) # Group the data to prepare for querying variants sim_data = pd.read_table(sim_path, na_values=".", dtype={"#CHROM": str, "AC": int}) add_derived_features(sim_data) sim_data = sim_data.groupby(VARIANT_COL) real_data = pd.read_table(real_path, na_values=".", dtype={"#CHROM": str}) add_derived_features(real_data) real_data = real_data.groupby(VARIANT_COL) # Depending on feature extractor, not all features may be available available_features = set(sim_data.obj) & set(real_data.obj) features = [feature for feature in FEATURE_COL if feature in available_features] vcf_reader = vcf.Reader(filename=vcf_path) for record in vcf_reader: variant = ( record.CHROM, int(record.POS), int(record.sv_end), record.var_subtype, ) try: current_sim = sim_data.get_group(variant) current_real = real_data.get_group(variant) except KeyError: # No data available for this variant, skipping logging.debug( "No simulated or real data found for %s. Skipping.", variant_descriptor(record), ) continue current_real["AC"] = [-1] # Remove outliers with Z score above threshold with warnings.catch_warnings(): warnings.simplefilter("ignore") current_sim = ( current_sim.groupby("AC") .apply(filter_by_zscore, features, 5) .reset_index(drop=True) ) plot_data = current_sim.append(current_real) # Don't yet know how to encode AC directly (need strings for plotting) plot_data["AC"] = pd.Categorical( plot_data["AC"], categories=[0, 1, 2, -1] ).rename_categories(["REF", "HET", "HOM", "Act"]) colors = sns.mpl_palette("Set1", 3) + [(0, 0, 0)] # Actual data is black markers = { "REF": "o", "HET": "o", "HOM": "o", "Act": "s"} fig, ((ax11, ax12, ax13, ax14), (ax21, ax22, ax23, ax24)) = plt.subplots(2, 4, figsize=(14, 8)) sns.scatterplot(ax=ax11, x="REF_READ", y="ALT_READ", data=plot_data, hue="AC", style="AC", markers=markers, palette=colors) _set_axis_limits(ax11) sns.scatterplot(ax=ax12, x="REF_WEIGHTED_SPAN", y="ALT_WEIGHTED_SPAN", data=plot_data, hue="AC", style="AC", markers=markers, palette=colors) _set_axis_limits(ax12) sns.scatterplot(ax=ax13, x="INSERT_LOWER", y="INSERT_UPPER", data=plot_data, hue="AC", style="AC", markers=markers, palette=colors) plot_hist(ax=ax14, col="CLIP_PRIMARY", data=plot_data, colors=colors) plot_hist(ax=ax21, col="COVERAGE", data=plot_data, colors=colors) plot_hist(ax=ax22, col="DHFC", data=plot_data, colors=colors) plot_hist(ax=ax23, col="DHBFC", data=plot_data, colors=colors) plot_hist(ax=ax24, col="DHFFC", data=plot_data, colors=colors) # Make plots square for ax in fig.get_axes(): ax.set_aspect(1.0/ax.get_data_ratio(), adjustable='box') fig.suptitle("{}:{}-{}".format(*variant), size=16) fig.subplots_adjust(top=0.95, wspace=0.3, hspace=0.3) # Save plot to file name based on variant descriptor description = variant_descriptor(record) logging.info("Plotting variant into %s.pdf", description) plt.savefig(os.path.join(out_dir_path, f"{description}.pdf"))
def plot_PCA(X, cmap='Spectral', colors=None, dims=(0, 1), ax=None, annotate=None, legend=True, **kwargs): ''' plot PCA from matrix and label :X: dataframe with index as categories :dims: dimensions to plot :return: None ''' from sklearn import preprocessing from sklearn.decomposition.pca import PCA X = X._get_numeric_data() S = pd.DataFrame(preprocessing.scale(X), columns=X.columns) pca = PCA(n_components=4) pca.fit(S) out = 'explained variance %s' % pca.explained_variance_ratio_ print(out) #print pca.components_ w = pd.DataFrame(pca.components_, columns=S.columns) #print (w.T.max(1).sort_values()) pX = pca.fit_transform(S) pX = pd.DataFrame(pX, index=X.index) ### graph if ax is None: fig, ax = plt.subplots(1, 1, figsize=(8, 8)) cats = pX.index.unique() if colors is None: colors = sns.mpl_palette(cmap, len(cats)) y1, y2 = dims offset = 7 for c, i in zip(colors, cats): ax.scatter(pX.loc[i, y1], pX.loc[i, y2], color=c, label=i, edgecolor='black', **kwargs) if annotate is not None: pX['lab#el'] = annotate i = 0 for idx, r in pX.iterrows(): x = r[y1] y = r[y2] l = annotate[i] ax.annotate(l, (x, y), xycoords='data', xytext=(2, 5), textcoords='offset points', fontsize=12) i += 1 ax.set_xlabel("X[%s]" % y1) ax.set_ylabel("X[%s]" % y2) if legend == True: ax.set_position([0.1, 0.1, 0.5, 0.8]) ax.legend(loc="best", bbox_to_anchor=(1.0, .9)) ax.set_title("PCA") return pX