def TSNE_manifold_plot(clusterer=None, X=None, cluster_name=""): N_projection = 2 # Transform using Manifold Learning X_transformed = TSNE(n_components=N_projection).fit_transform(X) # palette = sns.color_palette() try: cluster_colors = [ sns.desaturate(palette[col], sat) if col >= 0 else (0.5, 0.5, 0.5) for col, sat in zip(clusterer.labels_, clusterer.probabilities_) ] except: cluster_colors = [ sns.desaturate(palette[col], 1) if col >= 0 else (0.5, 0.5, 0.5) for col in clusterer.labels_ ] plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=cluster_colors, marker='.') plt.title("TSNE Manifold projection for the clustering") plt.xlabel("X₁") plt.ylabel("X₂") plt.savefig("TSNE_" + str(cluster_name) + ".pdf") plt.show()
def exclusion_layers(path_to_shapes, path_to_land_cover, path_to_slope, path_to_protected_areas, path_to_settlements, path_to_output, country_code): """Visualise the exclusion layers defining land eligibility.""" with fiona.open(path_to_shapes, "r") as shapefile: shape = [ feature["geometry"] for feature in shapefile if feature["properties"]["country_code"] == country_code ][0] x_min, y_min, x_max, y_max = shapely.geometry.asShape(shape).bounds land_cover, slope, protected_areas, esm = _read_raster( x_min, y_min, x_max, y_max, path_to_land_cover, path_to_slope, path_to_protected_areas, path_to_settlements) fig = plt.figure(figsize=(10, 5.5), frameon=True, constrained_layout=True) ax1 = fig.add_subplot(221) show(land_cover, extent=(x_min, x_max, y_min, y_max), ax=ax1, cmap=ListedColormap( sns.light_palette(sns.desaturate(BLUE, 0.85)).as_hex())) ax1.set_title("Exclusion from land cover") ax2 = fig.add_subplot(222) show(slope, extent=(x_min, x_max, y_min, y_max), ax=ax2, cmap=ListedColormap( sns.light_palette(sns.desaturate(YELLOW, 0.85)).as_hex())) ax2.set_title("Exclusion from slope") ax3 = fig.add_subplot(223) show(protected_areas, extent=(x_min, x_max, y_min, y_max), ax=ax3, cmap=ListedColormap( sns.light_palette(sns.desaturate(GREEN, 0.85)).as_hex())) ax3.set_title("Exclusion from protected areas") ax4 = fig.add_subplot(224) show(esm, extent=(x_min, x_max, y_min, y_max), ax=ax4, cmap=ListedColormap( sns.light_palette(sns.desaturate(RED, 0.85)).as_hex())) ax4.set_title("Exclusion from urban settlements") for ax in [ax1, ax2, ax3, ax4]: ax.add_patch(_inverted_shape(shape)) ax.set_xticks([]) ax.set_yticks([]) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["left"].set_visible(False) fig.set_constrained_layout_pads(hspace=0.1, wspace=0.1) if path_to_output[-3:] == "png": fig.savefig(path_to_output, dpi=600, transparent=False) else: fig.savefig(path_to_output, dpi=600, transparent=False, pil_kwargs={"compression": "tiff_lzw"})
def desaturate(self_or_cls, prop): "Decrease the saturation channel of all scheme colors by some percent." cls = self_or_cls if isinstance(self_or_cls, type) else type(self_or_cls) return cls( **{ key: Cycle([sns.desaturate(c, prop) for c in cycle.values]) if isinstance(cycle, Cycle) else sns.desaturate(cycle, prop) for key, cycle in self_or_cls.iter_names_and_colors() })
def _plot_layer(units, layer_name, ax, linewidth=0.1): winners = units[units["normed_potential"] >= 1] loosers = units[units["normed_potential"] < 1] invalids = units[~units.isin(pd.concat([winners, loosers]))].dropna() undersupplied_regions_percent = len(loosers) / len(units) * 100 undersupplied_population_percent = loosers["population_sum"].sum( ) / units["population_sum"].sum() * 100 ax.set_aspect('equal') winners.plot(color=sns.desaturate(GREEN, 0.85), linewidth=linewidth, edgecolor="white", alpha=0.5, ax=ax) if not loosers.empty: loosers.plot(color=RED, linewidth=0.1, ax=ax) if not invalids.empty: invalids.plot(color="grey", linewidth=0.1, ax=ax) ax.set_xlim(MAP_MIN_X, MAP_MAX_X) ax.set_ylim(MAP_MIN_Y, MAP_MAX_Y) ax.set_xticks([]) ax.set_yticks([]) sns.despine(ax=ax, top=True, bottom=True, left=True, right=True) ax.annotate(f"{LAYER_UNICODE} ", xy=[0.10, 0.90], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color="black") ax.annotate(f"{UNITS_UNICODE} ", xy=[0.10, 0.85], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color=sns.desaturate(RED, 0.85)) ax.annotate(f"{POPULATION_UNICODE} ", xy=[0.10, 0.80], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color=sns.desaturate(RED, 0.85)) ax.annotate(layer_name, xy=[0.17, 0.90], xycoords='axes fraction') ax.annotate(f"{undersupplied_regions_percent:.0f}%", xy=[0.17, 0.85], xycoords='axes fraction') ax.annotate(f"{undersupplied_population_percent:.0f}%", xy=[0.17, 0.80], xycoords='axes fraction')
def hist_bin(x, name): ''' 画离散型的频率直方图 @param x: @param name: @return: ''' a = plt.figure(figsize=(9, 6)) x_counts = x.value_counts(dropna=False) x2 = x_counts.index.to_list() y = x_counts.to_list() rect_width = 0.8 if x_counts.shape[0] < 10 else 0.4 plt.xlabel('取值') plt.ylabel('数量') plt.title(name) plt.bar(x=range(len(x2)), height=y, width=rect_width, edgecolor='k', color=sns.desaturate("indianred", .8), linewidth=0.5, yerr=0.000001, align="center") plt.grid(color='#95a5a6', linestyle='--', linewidth=1, axis='both', alpha=0.4) plt.xticks(range(len(x2)), x2) plt.close('all') return a
def plot_clusters(clusterer, pca_array, num_clusts, outfile): outfile = outfile.replace(".fasta", ".png") palette = sns.color_palette("tab20", num_clusts) sns.set_context('poster') sns.set_style('white') sns.set_color_codes() plot_kwds = {'alpha': 0.8, 's': 80, 'linewidths': 0.1} zippy = list((zip(clusterer.labels_, clusterer.probabilities_))) cluster_colors = [ sns.desaturate(palette[col], sat) if col != -1 else (0, 0, 0) for col, sat in zippy ] x = rand_jitter(pca_array.T[0]) y = rand_jitter(pca_array.T[1]) plt.scatter(x, y, c=cluster_colors, **plot_kwds) w = 6.875 h = 4 f = plt.gcf() f.set_size_inches(w, h) # plt.show() plt.savefig(outfile, ext='png', dpi=600, format='png', facecolor='white', bbox_inches='tight')
def latency_depth_binstat(depth_df, color='k', shade=[], bin_size=50, ax=[]): """ Parameters ---------- depth_df : pandas.DataFrame Containing depth and latency columns bin_size : optional, int Size of depth bins in microns """ if not ax: _, ax = plt.subplots() if not shade: shade = sns.desaturate([0, 0, 1], 0.2) depths = depth_df['depth'].values depths = depths[depth_df['latency'].notna()] latencies = depth_df['latency'].values latencies = latencies[depth_df['latency'].notna()] depths = depths.astype(float) latencies = latencies.astype(float) bins = np.arange(depths.min(), depths.max(), bin_size) bin_centers = bins[:-1] - (np.abs(bins[1] - bins[0])) / 2 x, _, _ = binned_statistic(depths, latencies, 'mean', bins=bins) x_sem, _, _ = binned_statistic(depths, latencies, statistic=sem, bins=bins) fig, ax = plt.subplots() plt.errorbar(bin_centers, x, color=color) plt.fill_between(bin_centers, x - x_sem, x + x_sem, color=shade) ax.set_ylim(0, ) return ax
def boot_ef(diff,X,Y,data,palette): sns.set_context('talk');sns.set_style('ticks') ef, ef_dist = pg.compute_bootci(x=diff,func='mean',method='cper', confidence=.95,n_boot=5000,decimals=4,seed=42,return_dist=True) fig, ax = plt.subplots(1,2,sharey=True) #graph # sns.pointplot(x=X,y=Y,data=data,join=False,ax=ax[0], # color='black',capsize=.3,nboot=5000) #means sns.swarmplot(x=X,y=Y,data=data,ax=ax[0], linewidth=2,edgecolor='black',size=8, palette=[palette[2]]) #swarmplot sns.kdeplot(ef_dist,shade=True,color='grey',vertical=True,ax=ax[1]) #effect size desat_r = sns.desaturate('red',.8) ax[1].vlines(0,ef[0],ef[1],color=desat_r,linewidth=2) #underline the 95% CI of effect in red y2 = ef_dist.mean(); ax[1].scatter(0,y2,s=16,color=desat_r) #draw line for mean effect for a in ax: xl = a.get_xlim(); a.hlines(0,xl[0],xl[1],color='grey',linestyle='--') #draw line at 0 effect sns.despine(ax=ax[0]); sns.despine(ax=ax[1],left=True,right=True) #despine the plots for aesthetics ax[1].set_title('%s 95 CI = %s'%(Y,ef)) #make the ticks look better ticks = {'scr':[.2,.1,-.2,1], 'rt' :[150,75,-600,300]} ax[0].set_ylim(ticks[val][-2],ticks[val][-1]) ax[0].yaxis.set_major_locator(MultipleLocator(ticks[val][0])) ax[0].yaxis.set_minor_locator(MultipleLocator(ticks[val][1])) if palette == pospal: out = 'pg' else: out = 'fg' plt.savefig(os.path.join(data_dir,'plots','%s_%s_diff.eps'%(out,val)),format='eps')
def _plot_layer(units, layer_name, norm, cmap, ax): ax.set_aspect('equal') units.plot(linewidth=0.1, column="fraction_non_built_up_land_necessary", vmin=norm.vmin, vmax=norm.vmax, cmap=cmap, ax=ax) ax.set_xlim(MAP_MIN_X, MAP_MAX_X) ax.set_ylim(MAP_MIN_Y, MAP_MAX_Y) ax.set_xticks([]) ax.set_yticks([]) sns.despine(ax=ax, top=True, bottom=True, left=True, right=True) ax.annotate(f"{LAYER_UNICODE} ", xy=[0.10, 0.90], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color="black") ax.annotate(f"{AREA_UNICODE} ", xy=[0.10, 0.85], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color=sns.desaturate(RED, 0.85)) ax.annotate(layer_name, xy=[0.17, 0.90], xycoords='axes fraction') median_land_demand_population_centered = _calculate_population_centered_median_land_demand( units) ax.annotate(f"{median_land_demand_population_centered:.0f}%", xy=[0.17, 0.85], xycoords='axes fraction')
def region_color(region_name, light=False, sat=[]): """ Parameters ---------- region_name : str Structure recorded from light : optional, bool Return lightest color of map (default = False) sat : optional, float Percent to desaturate output color (default = 0) Returns ------- Hex color value for plotting """ cmap = region_cmap(region_name) if light: c = rgb2hex(cmap.colors[0]) else: c = rgb2hex(cmap.colors[-1]) if sat: c = sns.desaturate(c, sat) return c
def plot(self, fig_size=None, label_every=0, clusters=False): """ The label_every is used to determine what fraction of points should be labeled, if it is not 0 the structure is: names[::label_every] """ plt.figure(figsize=fig_size) if label_every > 0: for i, name in enumerate(self.names[::label_every]): plt.text(self.embed[i].T[0], self.embed[i].T[1], name, fontsize=9) if clusters: palette = sns.color_palette(n_colors=len(self.clusters.labels_)) cluster_colors = [ sns.desaturate(palette[col], sat) if col >= 0 else (0.5, 0.5, 0.5) for col, sat in zip( self.clusters.labels_, self.clusters.probabilities_) ] plt.scatter(self.embed[:].T[0], self.embed[:].T[1], c=cluster_colors) if not clusters: plt.scatter(self.embed[:].T[0], self.embed[:].T[1]) plt.axis('off') plt.show()
def stim_color(stim_name, desat=[], ind=[]): """ Parameters ---------- stim_name : str Stimulus name (drifting_gratings, static_gratings, natural_scenes) desat : optional, float Percent to desaturate colors by (default = 0) ind : optional, integer Choose just one of the color palette (default = [], returns all ) """ if stim_name is 'natural_scenes': cmap = sns.light_palette((260, 75, 60), input="husl") elif stim_name is 'static_gratings': cmap = sns.light_palette('seagreen') else: print('Unrecognized stimulus: {}'.format(stim_name)) return if desat: cmap = sns.desaturate(cmap, desat) if not ind: return cmap else: return cmap[ind]
def TSNE_manifold_plot_prob(labels = None, X = None, cluster_name = None, transform = True): N_projection = 2 # Transform using Manifold Learning if transform == True: X_transformed = TSNE(n_components = N_projection).fit_transform(X) else: if(X.shape[1] > 2): print("GIVE 2 DIMENSIONAL DATA") return -1 else: X_transformed = X # palette = sns.color_palette('husl', (max(labels)+1)) cluster_colors = [sns.desaturate(palette[col], 1) if col >= 0 else (0.5, 0.5, 0.5) for col in labels] plt.scatter(X_transformed[:,0], X_transformed[:,1], c=cluster_colors, marker='.') plt.title("TSNE Manifold projection for the clustering") plt.xlabel("X₁") plt.ylabel("X₂") if cluster_name is not None: plt.savefig("plots/TSNE_"+str(cluster_name)+".pdf") plt.show()
def _plot_layer(units, layer_name, norm, cmap, ax): ax.set_aspect('equal') units.plot(linewidth=0.1, column="cost", vmin=norm.vmin, vmax=norm.vmax, cmap=cmap, ax=ax) ax.set_xlim(MAP_MIN_X, MAP_MAX_X) ax.set_ylim(MAP_MIN_Y, MAP_MAX_Y) ax.set_xticks([]) ax.set_yticks([]) sns.despine(ax=ax, top=True, bottom=True, left=True, right=True) ax.annotate(f"{LAYER_UNICODE} ", xy=[0.10, 0.90], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color="black") ax.annotate(f"{MONEY_UNICODE} ", xy=[0.10, 0.85], xycoords='axes fraction', fontproperties=matplotlib.font_manager.FontProperties( fname=PATH_TO_FONT_AWESOME.as_posix()), color=sns.desaturate(RED, 0.85)) ax.annotate(layer_name, xy=[0.17, 0.90], xycoords='axes fraction') ax.annotate(f"{units.cost.mean():.2f}€/kWh", xy=[0.17, 0.85], xycoords='axes fraction')
def plotting_all_vars_dbscan(vars = [], clusterer = None, data = None): # !!! This plot is not optimized and some # graphs are repeated. !!! # Define the size of the canvas and the # distance between points fig = plt.figure(figsize = (30, 30)) fig.subplots_adjust(hspace=0.8, wspace=0.8) # Define color palette palette = sns.color_palette() # Variable to keep track of the plot i = 0 for colname1 in vars[:(len(vars)-1)]: for colname2 in vars: if colname1 == colname2: pass else: i += 1 cluster_colors = [sns.desaturate(palette[col], 1) if col >= 0 else (0.5, 0.5, 0.5, 0.5) for col in clusterer.labels_] ax = plt.subplot(len(vars), len(vars), i) ax.scatter(data[colname1], data[colname2], c=cluster_colors, marker='.') plt.xlabel(colname1) plt.ylabel(colname2) plt.savefig("dbscan_res.png") plt.close()
def plot_clusters_and_atm(atm_json, c, high_traffic_vertex_array, atm_lon_lat_2d_array): fig = plt.figure() ax = fig.add_subplot(111) # PLOT HIGH TRAFFIC VERTICES color_palette = sns.color_palette('bright', 12) cluster_colors = [color_palette[x] if x >= 0 else (0.5, 0.5, 0.5) for x in c.labels_] cluster_member_colors = [sns.desaturate(x, p) for x, p in zip(cluster_colors, c.probabilities_)] ax.scatter( *high_traffic_vertex_array.T, s=50, linewidth=0, c=cluster_member_colors, alpha=0.3 ) # PLOT CURRENT ATM plot_current_atm(atm_json, ax) # PLOT NEW ATM POINTS plot_new_atm(atm_lon_lat_2d_array, ax) ax.legend( fontsize=8, facecolor='oldlace', edgecolor='r', loc='lower right' )
def plot_all_vars_prob(labels = None, variables = [], data = None, cluster_name = None): # !!! This plot is not optimized and some # graphs are repeated. !!! # Define the size of the canvas and the # distance between points fig = plt.figure(figsize = (30, 30)) fig.subplots_adjust(hspace=0.8, wspace=0.8) # Define color palette palette = sns.color_palette('husl', (max(labels)+1)) # Variable to keep track of the plot i = 0 for colname1 in variables[:(len(variables)-1)]: for colname2 in variables: if colname1 == colname2: pass else: i += 1 cluster_colors = [sns.desaturate(palette[col], 1) if col >= 0 else (0.5, 0.5, 0.5, 0.5) for col in labels] ax = plt.subplot(len(variables), len(variables), i) ax.scatter(data[colname1], data[colname2], c=cluster_colors, marker='.') plt.xlabel(colname1) plt.ylabel(colname2) if cluster_name is not None: plt.savefig("plots/mutliplot_"+str(cluster_name)+".pdf") plt.show()
def plot_map(path_to_continental_shape, path_to_national_shapes, path_to_regional_shapes, path_to_plot): """Plot maps of results.""" np.random.seed(123456789) fig = plt.figure(figsize=(8, 8), constrained_layout=True) axes = fig.subplots(2, 2).flatten() norm = matplotlib.colors.Normalize(vmin=0, vmax=0.25) cmap = sns.light_palette(sns.desaturate(RED, 0.85), reverse=False, as_cmap=True) continental = gpd.read_file(path_to_continental_shape).to_crs( EPSG_3035_PROJ4) national = gpd.read_file(path_to_national_shapes).to_crs(EPSG_3035_PROJ4) regional = gpd.read_file(path_to_regional_shapes).to_crs(EPSG_3035_PROJ4) regional_relaxed = regional.copy() continental["cost"] = 0.1 national["cost"] = np.random.normal(loc=0.14, scale=0.04, size=len(national)) regional["cost"] = np.random.normal(loc=0.18, scale=0.04, size=len(regional)) regional_relaxed["cost"] = np.random.normal(loc=0.12, scale=0.04, size=len(regional_relaxed)) _plot_layer(continental, "continental", norm, cmap, axes[0]) _plot_layer(national, "national", norm, cmap, axes[1]) _plot_layer(regional, "regional", norm, cmap, axes[2]) _plot_layer(regional_relaxed, "regional with trade", norm, cmap, axes[3]) _plot_colorbar(fig, axes, norm, cmap) fig.savefig(path_to_plot, dpi=300, transparent=True)
def show_statistics(data,stockid): # fig1 = plt.figure(2) # rects = plt.bar(left = (0.2,1),height = (1,0.5),width = 0.2,align="center",yerr=0.000001) plt.title("count") plt.hist(data, bins=12, color=sns.desaturate("indianred", .8), alpha=.4) plt.savefig('C:\Users\Dolphin\Desktop\statistics\ ' + stockid + '.jpg') plt.show()
def get_cluster_colors(clusterer, palette='hls'): """Create cluster colors based on labels and probability assignments""" n_clusters = len(np.unique(clusterer.labels_)) color_palette = sns.color_palette(palette, n_clusters) cluster_colors = [color_palette[x] if x >= 0 else (0.5, 0.5, 0.5) for x in clusterer.labels_] if hasattr(clusterer, 'probabilities_'): cluster_colors = [sns.desaturate(x, p) for x, p in zip(cluster_colors, clusterer.probabilities_)] return cluster_colors
def debug_pic(self, clusterer, coords, debug_pic_name, outliers): color_palette = sns.color_palette('deep', 20) cluster_colors = [color_palette[x] if x >= 0 else (0.5, 0.5, 0.5) for x in clusterer.labels_] cluster_member_colors = [sns.desaturate(x, p) for x, p in zip(cluster_colors, clusterer.probabilities_)] plt.scatter(*list(coords), c=cluster_member_colors, linewidth=0) # plt.scatter(*list(coords[:,outliers].T), linewidth=0, c='red') plt.savefig(debug_pic_name + ".png", bbox_inches='tight')
def hist_distribute(x: pd.Series, title: str, nbin=10): ''' :param x: pandas series :param title: plot name :return: matplot figure ''' a = plt.figure(figsize=figure_size) a = x.hist(color=sns.desaturate("indianred", .8), bins=nbin).get_figure() plt.title(title) plt.close('all') return a
def compare_distributions(degree_list, freq_list, original_distribution, log, name, path, save): plt.plot(degree_list, freq_list, 'o', ms=8, color=sns.desaturate("darkkhaki", .80)) plt.plot(degree_list, original_distribution, '-', ms=8, color=sns.desaturate("blueviolet", .80)) plt.suptitle('Theoretical vs. Experimental Degree Distributions', fontweight='bold', fontsize=12) plt.title(name, style='italic', fontsize=9) plt.grid(b=True, which='minor', linestyle='-') plt.tick_params(axis='both', which='both', direction='out', color='0.75', length=3, width=1, top=False, right=False) plt.minorticks_on() if log: plt.xscale('log') plt.yscale('log') axes = plt.gca() xmin, xmax = configure_x_axis(np.min(degree_list), np.max(degree_list)) axes.set_xlim([xmin, xmax]) if save: plt.savefig(path + '_original.pdf', bbox_inches='tight') plt.close()
def plot_clustering(projection, cluster_data, title): """ Plot the clustering in 2d and save it in pdf. Visualization can be improved. """ go_index = cluster_data[:, 0] cluster_labels = cluster_data[:, 1] cluster_probs = cluster_data[:, 2] color_palette = sns.color_palette('Paired', len(cluster_labels)) cluster_colors = [color_palette[x] if x >= 0 else (0.5, 0.5, 0.5) for x in cluster_labels] cluster_member_colors = [sns.desaturate(x, p) for x, p in zip(cluster_colors, cluster_probs)] plt.scatter(projection[:, 0], projection[:, 1], c=cluster_member_colors, **PLOT_KWDS) plt.title(title) plt.savefig(PLOT_DIR / f"{title}.pdf")
def income_clustering(year): json_data = tools.get_json() json_data = tools.filter_data(json_data, lambda e: e['main']['year'] == year) json_data = tools.filter_data(json_data, lambda e: e['main']['office']['id'] == 14) data = [] persons = [] for entry in json_data: person_id = entry['main']['person']['id'] income_self = sum( [e['size'] for e in entry['incomes'] if e['relative'] == None]) income_rel = sum( [e['size'] for e in entry['incomes'] if e['relative'] != None]) data.append([income_self, income_rel]) persons.append(person_id) dframe = pd.DataFrame(data, index=persons) dframe.columns = ['income_self', 'income_rel'] print(dframe) classifier = hdbscan.HDBSCAN(min_cluster_size=5).fit(dframe) classified = [] for label in set(filter(lambda x: x >= 0, classifier.labels_)): print('Cluster label: ', label) ids = [i for i, x in enumerate(classifier.labels_) if x == label] for i in ids: print(persons[i], person_id2name[str(persons[i])], data[i]) classified.append((label, persons[i], data[i][0], data[i][1])) print('\n') colour_palette = sns.color_palette('deep', 20) cluster_colours = [colour_palette[x[0]] for x in classified] cluster_member_colours = [ sns.desaturate(x, p) for x, p in zip(cluster_colours, classifier.probabilities_) ] dx = [x[2] for x in classified] dy = [x[3] for x in classified] plt.scatter(x=dx, y=dy, s=10, linewidth=0, c=cluster_member_colours, alpha=1) plt.show()
def plot_pdf(log, degrees_list, division, kmin, kmax, name, path, save, num_bins): weights = np.ones_like(degrees_list) / float(len(degrees_list)) pdf = plt.hist(degrees_list, bins=division, weights=weights, log=log, alpha=0.8, color=sns.desaturate("darkkhaki", .80)) plt.grid(b=True, which='minor', linestyle='-') plt.tick_params(axis='both', which='both', direction='out', color='0.75', length=3, width=1, top=False, right=False) plt.minorticks_on() plt.xlabel("k") plt.ylabel("P(k)") if log: gamma = exponent_estimation(division, pdf) plt.suptitle(name + ' PDF', fontweight='bold', fontsize=12) plt.title('Estimated gamma: ' + str(gamma), fontweight='bold', style='italic', fontsize=9) plt.xscale('log') axes = plt.gca() ymin, ymax = configure_y_axis(pdf) axes.set_ylim([ymin, ymax]) xmin, xmax = configure_x_axis(kmin, kmax) axes.set_xlim([xmin, xmax]) else: plt.title(name + ' PDF', fontweight='bold', fontsize=12) if save: plt.savefig(path + '_' + str(num_bins) + 'bins_PDF.pdf', bbox_inches='tight') plt.show() plt.close()
def _map(unit_layers, layer_names, path_to_plot): fig = plt.figure(figsize=(8, 8), constrained_layout=True) axes = fig.subplots(2, 2).flatten() norm = matplotlib.colors.Normalize(vmin=0, vmax=1) cmap = sns.light_palette(sns.desaturate(RED, 0.85), reverse=False, as_cmap=True) _plot_layer(unit_layers[0], layer_names[0], norm, cmap, axes[0]) _plot_layer(unit_layers[1], layer_names[1], norm, cmap, axes[1]) _plot_layer(unit_layers[2], layer_names[2], norm, cmap, axes[2]) _plot_layer(unit_layers[3], layer_names[3], norm, cmap, axes[3]) _plot_colorbar(fig, axes, norm, cmap) fig.savefig(path_to_plot, dpi=300, transparent=True)
def plot_vars(tasks, contrasts, axes=None, xlabel='Value', standardize=False): colors = sns.hls_palette(4) desat_colors = [sns.desaturate(c, .5) for c in colors] for i, task in enumerate(tasks): subset = contrasts.filter(regex='^' + task) if subset.shape[1] != 0: if standardize: subset = subset / subset.std() subset.columns = [c.split('.')[1] for c in subset.columns] subset.columns = format_variable_names(subset.columns) # add mean value to columns means = subset.mean() subset.columns = [ subset.columns[i] + ': %s' % format_num(means.iloc[i]) for i in range(len(means)) ] subset = subset.melt(var_name='Variable', value_name='Value') sns.stripplot(x='Value', y='Variable', hue='Variable', ax=axes[i], data=subset, palette=desat_colors, jitter=True, alpha=.75) # plot central tendency N = len(means) axes[i].scatter(means, range(N), s=200, c=colors[:N], edgecolors='white', linewidths=2, zorder=3) # add legend leg = axes[i].get_legend() leg.set_title('') beautify_legend(leg, colors=colors, fontsize=14) # change axes max_val = subset.Value.abs().max() axes[i].set_xlim(-max_val, max_val) axes[i].set_xlabel(xlabel, fontsize=16) axes[i].set_ylabel('') axes[i].set_yticklabels('') axes[i].set_title(format_variable_names([task])[0].title(), fontsize=20) plt.subplots_adjust(hspace=.3)
def Describe(self): print('数据示例:', '\n', self.data.head(5), '\n', self.data.tail(5), '\n', '-'*60) print('描述性统计分析:', '\n', self.data.describe(), '\n', '-'*60) #频次直方图 cols = self.data.columns indice=1#子图索引 for i in cols: plt.subplot(2, 3, indice)#绘制子图 plt.subplots_adjust(wspace =0.3, hspace =0.4) #可改为hist #plt.figure(i)#重置画布 sns.distplot(self.data[i], color=sns.desaturate("indianred", .8), bins=40) indice+=1 plt.axvline(x=self.data[i].mean(), ls=":", c="green") plt.axvline(x=self.data[i].median(), ls=":", c="blue")
def generate_hdbscan_cluster_plot(proj, mod, pdata, kvalue, colone, coltwo): rdata = sample_down(pdata) expl_rows = pe.retrieve_prediction_explanations(proj, mod, rdata) dfsample, clusterer = hdbscan_cluster_by_strength(proj, expl_rows, kvalue) print("HDBScan Clustering finished") dim1 = rdata[colone] dim2 = rdata[coltwo] num_clusters = len(np.unique(clusterer.labels_)) pal = sns.color_palette('deep', num_clusters) colors = [ sns.desaturate(pal[col], sat) for col, sat in zip(clusterer.labels_, clusterer.probabilities_) ] plt.scatter(dim1, dim2, c=colors) return plt
def plot_results(clusterer, data, p4id, kind, reduced_data=None, ax=None): functions = dict(blotch=p4id.plot_blotches, fan=p4id.plot_fans) if ax is None: fig, ax = plt.subplots() plot_kwds = {'alpha': 0.8, 's': 10, 'linewidths': 0} palette = sns.color_palette('bright', clusterer.n_clusters) cluster_colors = [palette[x] if x >= 0 else (0.5, 0.5, 0.5) for x in clusterer.hdbscan.labels_] cluster_member_colors = [sns.desaturate(x, p) for x, p in zip(cluster_colors, clusterer.hdbscan.probabilities_)] p4id.show_subframe(ax=ax) ax.scatter(data.loc[:, 'x'], data.loc[:, 'y'], c=cluster_member_colors, **plot_kwds) # pick correct function for kind of marking: if reduced_data is not None: functions[kind](ax=ax, data=reduced_data, lw=1)
mf = utils.np_seq2mid(seq) mf.open('/tmp/tmp.mid', 'wb') mf.write() mf.close() subprocess.call("/usr/local/bin/timidity -D 0 -R 1000 /tmp/tmp.mid", stdout=FNULL, stderr=FNULL, shell=True) palette = sns.color_palette("Paired", max(clusterer.labels_)+1) zipped = zip(clusterer.labels_, clusterer.probabilities_) cluster_colors = [] # make the colors for col, sat in zipped: if col == main_label: cluster_colors.append((0., 0., 0.)) elif col >= 0: cluster_colors.append(sns.desaturate(palette[col], 1)) else: cluster_colors.append((0.5, 0.5, 0.5)) plt.scatter(tnse_themes[:, 0], tnse_themes[:, 1], c=cluster_colors, **plot_kwds) plt.show() # agglomerative # ward = AgglomerativeClustering(n_clusters=20, linkage='ward').fit(tnse_themes) # palette = sns.color_palette("Set2", max(ward.labels_)+1) # cluster_colors = [sns.desaturate(palette[col], 1) # if col >= 0 else (0.5, 0.5, 0.5) for col in # ward.labels_] # plt.scatter(tnse_themes[:, 0], tnse_themes[:, 1], c=cluster_colors, **plot_kwds) # plt.show()
def process_files(args): sns.set_style("darkgrid", {"axes.facecolor": "#E5E5E5", 'font.family': 'Arial'}) sns.set_context('paper') if args.no_header: names = ['SEQUENCE', 'COUNT'] else: names = None if args.include_par: assert args.include_par == 'GCAAAAGCAG' or args.include_par == 'GCGAAAGCAG' if args.gzipped: args.file_a, args.file_b = (StringIO(Popen(['zcat', f.name], stdout=PIPE).communicate()[0]) \ for f in [args.file_a, args.file_b]) file_a = pd.read_table(args.file_a, index_col=None, names=names) file_b = pd.read_table(args.file_b, index_col=None, names=names) file_list = [file_a, file_b] for idx, f in enumerate(file_list): for col in (args.x_col, args.y_col): if args.cpm: f['CPM'] = f[col] / float(f[col].sum()) f['CPM']= f['CPM'] * 1000000. if args.greater_than_zero: file_list[idx] = file_list[idx][file_list[idx][col] > 0] if args.cutoff_reads: file_list[idx] = file_list[idx][file_list[idx][col] >= args.cutoff_reads] if args.cutoff_cpm: file_list[idx] = file_list[idx][file_list[idx]['CPM'] >= args.cutoff_cpm] n_x = file_a[args.x_col].astype(int).sum() n_y = file_b[args.y_col].astype(int).sum() u_x, u_y = (len(df) for df in (file_a, file_b)) merged_df = pd.merge(file_list[0], file_list[1], on=args.on, how=args.how).fillna(0.) if args.x_col == args.y_col: args.x_col += '_x' args.y_col += '_y' if args.only_starts_with is not None: for word in args.only_starts_with: merged_df = merged_df[merged_df[args.on].map( lambda x: x.startswith(word))] n_ux = merged_df[args.x_col].astype(int).sum() n_uy = merged_df[args.y_col].astype(int).sum() for col in (args.x_col, args.y_col): merged_df = merged_df.replace([np.inf, -np.inf], np.nan).dropna(subset=[col], how="all") merged_df[col] = merged_df[col].map(lambda x: np.log10(x)) merged_df = merged_df.replace([np.inf, -np.inf], np.nan).dropna(subset=[col], how="all") if args.restrict_lengths is not None: filt = merged_df['SEQUENCE'].apply( lambda x: len(x) >= args.restrict_lengths[0] and \ len(x) <= args.restrict_lengths[1]) original_df = merged_df.copy() merged_df = merged_df[filt] if args.quantile_normalize: merged_df = quantile_normalize( merged_df, columns=[args.x_col, args.y_col]) if not args.add_histograms: fig = plt.figure() ax = plt.gca() plt.scatter(merged_df[args.x_col], merged_df[args.y_col], edgecolor=sns.desaturate('black', 0.75), facecolors='none', s=0.5) # ax.set_yscale('log') # ax.set_xscale('log') else: g = sns.JointGrid(args.x_col, args.y_col, merged_df) if args.bandwidth: kde_kws = {'bw': args.bandwidth} else: kde_kws = None g.plot_marginals(sns.distplot, hist=True, kde=True, color='blue', bins=50, kde_kws=kde_kws) g.plot_joint(plt.scatter, edgecolor=sns.desaturate('black', 0.75), facecolors='none', s=0.5) ax = g.ax_joint # g.ax_marg_x.set_xscale('log') # g.ax_marg_x.set_yscale('log') # g.ax_marg_y.set_yscale('log') # g.ax_marg_y.set_xscale('log') g.ax_marg_x.grid(False, which='both') g.ax_marg_y.grid(False, which='both') ax = g.ax_joint ax.grid(False, which='minor') ax.grid(True, which='major') # ax.axis('equal') plt.xlabel(args.xlabel, fontsize=16) plt.title(args.title) plt.ylabel(args.ylabel, fontsize=16) plt.xlim([0.5, np.log10(args.max_val)]) plt.ylim([0.5, np.log10(args.max_val)]) for axis in (ax.xaxis, ax.yaxis): for tick in axis.get_major_ticks(): tick.label.set_fontsize(16) if args.snRNAs is not None: if 'all' in args.snRNAs: args.snRNAs = list(U_SNRNA_DATAFRAME['SEQUENCE']) args.snRNAs.sort() for sequence in args.snRNAs: row = U_SNRNA_DATAFRAME[U_SNRNA_DATAFRAME.apply( lambda x: x['SEQUENCE'] == sequence, axis=1)] facecolor = row['FILL'][row.index[0]] edgecolor = row['COLOR'][row.index[0]] marker = row['SHAPE'][row.index[0]] snrnas = [None, None] if args.include_par: f = original_df['SEQUENCE'].map( lambda x: has_prime_and_realigned(x, sequence, args.include_par)) sub_df = original_df[f] if sequence == 'AGCTTTGCGCA': for i, label in enumerate([args.xlabel, args.ylabel]): trimmed = False for gene in ('HA', 'NA', 'NP', 'NS1'): if gene in label: trimmed = True break if trimmed: snrnas[i] = file_list[i][file_list[i]['SEQUENCE'] == 'AGCTTT'][args.x_col[:-2]].map(lambda x: np.log10(x)) else: snrnas[i] = file_list[i][file_list[i]['SEQUENCE'] == sequence][args.y_col[:-2]].map(lambda x: np.log10(x)) elif sequence == 'ATACTCTGGTTTCTCTTCA' and 'NS1' in args.xlabel and \ 'PB2' in args.ylabel and not ('Pelchat' in args.xlabel): # find numbers in vfiltered_summary.txt.gz file snrnas[0] = pd.Series(np.log10(130.)) snrnas[1] = pd.Series(np.log10(12.)) else: sub_df = merged_df[merged_df['SEQUENCE'] == sequence] snrnas[0] = sub_df[args.x_col] snrnas[1] = sub_df[args.y_col] if len(snrnas[0]) > 0 and len(snrnas[1]) > 0: plt.scatter(merged_df[args.x_col], merged_df[args.y_col], edgecolor=sns.desaturate('black', 0.75), facecolors='none', s=0.5) plt.scatter(snrnas[0], snrnas[1], facecolor=facecolor, edgecolor=edgecolor, label=sequence, marker=marker, s=30, linewidth='1.5') if args.legend: l = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, borderaxespad=0., fancybox=True, shadow=True, scatterpoints=1, fontsize=13) for text in l.get_texts(): row = U_SNRNA_DATAFRAME[U_SNRNA_DATAFRAME.apply( lambda x: x['SEQUENCE'] == text._text, axis=1)] text.set_color(row['COLOR'][row.index[0]]) if args.color_threeprime_startswith is not None: sub_df = sub_df[merged['THREEPRIME_SEQUENCE_x'].map( lambda x: x.startswith(args.color_threeprime_startswith))] plt.scatter(sub_df[args.x_col], sub_df[args.y_col], facecolor='r', edgecolor='r', label='G downstream of cellular fragment') if args.spearman_r: ax.text(0.02, 0.97, (r'$\rho_s = {:5.2f}$'.format( spearmanr(np.array(merged_df[args.x_col]), np.array(merged_df[args.y_col]))[0])), ha='left', va='center', transform=ax.transAxes, fontsize=14) if args.n_stats: ax.text(0.02, 0.925, '$n_x = {:.2E}$'.format(n_x).replace('+', ''), ha='left', va='center', transform=ax.transAxes, fontsize=14) ax.text(0.02, 0.88, '$n_y = {:.2E}$'.format(n_y).replace('+', ''), ha='left', va='center', transform=ax.transAxes, fontsize=14) ax.text(0.35, 0.97, '$u_x = {:.2E}$'.format(u_x).replace('+', ''), ha='left', va='center', transform=ax.transAxes, fontsize=14) ax.text(0.35, 0.925, '$u_y = {:.2E}$'.format(u_y).replace('+', ''), ha='left', va='center', transform=ax.transAxes, fontsize=14) ax.text(0.35, 0.88, '$u_{{merged}} = {:.2E}$'.format(len(merged_df)).replace('+', ''), ha='left', va='center', transform=ax.transAxes, fontsize=14) try: bbox_extra_artists = (l,) except NameError: bbox_extra_artists = None plt.savefig(args.outfile, format=args.format, dpi=args.dpi, bbox_extra_artists=bbox_extra_artists, bbox_inches='tight') plt.close()
def main(plotting = True): odor_off = mozzie_hists['odor_off_hists'] odor_on = mozzie_hists['odor_on_hists'] ## dict structure # 'acceleration' # 'y' # 'x' # 'z' # 'abs' #'velocity' # 'y' # 'normed_cts', 'bin_centers' # 'x' # 'z' # 'abs' #'angular_velocity' # 'y' # 'x' # 'z' # 'abs' if plotting is True: # Plot! fig, axs = sns.plt.subplots(2, 2)#, tight_layout=True) #### Velocity ### v Odor off #x axs[0,0].plot(odor_off["velocity"]['x']['bin_centers'][::-1], odor_off["velocity"]['x']['normed_cts'], color=sns.desaturate("blue", .4), lw=2, label='$\mathbf{\dot{x}}$') #y axs[0,0].plot(odor_off["velocity"]['y']['bin_centers'], odor_off["velocity"]['y']['normed_cts'], color=sns.desaturate("green", .4), lw=2, label='$\mathbf{\dot{y}}$') #z axs[0,0].plot(odor_off["velocity"]['z']['bin_centers'], odor_off["velocity"]['z']['normed_cts'], color=sns.desaturate("red", .4), lw=2, label='$\mathbf{\dot{z}}$') #abs axs[0,0].plot(odor_off["velocity"]['abs']['bin_centers'], odor_off["velocity"]['abs']['normed_cts'], color=sns.desaturate("black", .4), lw=2, label='$\mathbf{\| v \|}$') axs[0,0].set_ylabel("Probabilities (odor off)") axs[0,0].legend() #plt.savefig("./Agent Model/figs/DickinsonFigs/odorOFF_velo distributions.png") ###v Odor on #x axs[1,0].plot(odor_on["velocity"]['x']['bin_centers'][::-1], odor_on["velocity"]['x']['normed_cts'], color=sns.desaturate("blue", .4), lw=2, label='$\mathbf{\dot{x}}$') #y axs[1,0].plot(odor_on["velocity"]['y']['bin_centers'], odor_on["velocity"]['y']['normed_cts'], color=sns.desaturate("green", .4), lw=2, label='$\mathbf{\dot{y}}$') #z axs[1,0].plot(odor_on["velocity"]['z']['bin_centers'], odor_on["velocity"]['z']['normed_cts'], color=sns.desaturate("red", .4), lw=2, label='$\mathbf{\dot{z}}$') #abs axs[1,0].plot(odor_on["velocity"]['abs']['bin_centers'], odor_on["velocity"]['abs']['normed_cts'], color=sns.desaturate("black", .4), lw=2, label='$\| \mathbf{v} \|$') axs[1,0].set_ylabel("Probabilities (odor on)") # setting for whole row axs[1,0].set_xlabel("Velocity Distributions ($m/s$)")# setting for whole col axs[1,0].legend() #plt.savefig("./Agent Model/figs/DickinsonFigs/odorON_velo distributions.png") #### Acceleration ###a Odor off #x axs[0,1].plot(odor_off["acceleration"]['x']['bin_centers'], odor_off["acceleration"]['x']['normed_cts'], color=sns.desaturate("blue", .4), lw=2, label='$\mathbf{\ddot{x}}$') #sns.barplot(odor_off["acceleration"]['x']['bin_centers'], odor_off["acceleration"]['x']['normed_cts']) #y axs[0,1].plot(odor_off["acceleration"]['y']['bin_centers'], odor_off["acceleration"]['y']['normed_cts'], color=sns.desaturate("green", .4), lw=2, label='$\mathbf{\ddot{y}}$') #z axs[0,1].plot(odor_off["acceleration"]['z']['bin_centers'], odor_off["acceleration"]['z']['normed_cts'], color=sns.desaturate("red", .4), lw=2, label='$\mathbf{\ddot{z}}$') #abs axs[0, 1].plot(odor_off["acceleration"]['abs']['bin_centers'], odor_off["acceleration"]['abs']['normed_cts'], color=sns.desaturate("black", .4), lw=2, label='$\| \mathbf{a} \|$') axs[0, 1].legend() #plt.savefig("./Agent Model/figs/DickinsonFigs/odorOFF_accel distributions.png") ###a Odor on #x axs[1, 1].plot(odor_on["acceleration"]['x']['bin_centers'], odor_on["acceleration"]['x']['normed_cts'], color=sns.desaturate("blue", .4), lw=2, label='$\mathbf{\ddot{x}}$') #sns.barplot(odor_off["acceleration"]['x']['bin_centers'], odor_off["acceleration"]['x']['normed_cts']) #y axs[1, 1].plot(odor_on["acceleration"]['y']['bin_centers'], odor_on["acceleration"]['y']['normed_cts'], color=sns.desaturate("green", .4), lw=2, label='$\mathbf{\ddot{y}}$') #z axs[1, 1].plot(odor_on["acceleration"]['z']['bin_centers'], odor_on["acceleration"]['z']['normed_cts'], color=sns.desaturate("red", .4), lw=2, label='$\mathbf{\ddot{z}}$') #abs axs[1,1].plot(odor_on["acceleration"]['abs']['bin_centers'], odor_on["acceleration"]['abs']['normed_cts'], color=sns.desaturate("black", .4), lw=2, label='$\| \mathbf{a} \|$') axs[1,1].set_xlabel("Acceleration Distributions ($m^s/s$)") axs[1,1].legend() fig.suptitle("Dickinson Distributions", fontsize=14) plt.savefig("Dicks distributions.png") return odor_off
mean3 IRRELEVANT stderror3 mean4 IRRELEVANT stderror4 mean4 IRRELEVANT stderror5 Currently requires 5 primer pairs in normal orientation look down if different orientation ''' import numpy as np import matplotlib.pyplot as plt import os import sys from coord import * #coord contains the coordinates of primers of used genes from scipy.interpolate import spline from seaborn import blend_palette,desaturate,color_palette #global variables definitions primers=('5\'', '.','..','...','3\'') color_scheme=blend_palette([desaturate("#009B76", 0), "#009B76"], 5) line_color=color_palette("hls", 8) os.chdir('/Users/Luis/Desktop') '''global definitions from command line argument 1- should be file name, will be split at dot to extract GENE NAMe argument 2- should be a string with IP used argument 3- string with different conditions separated by white space ''' file_name=sys.argv[1] Condition=sys.argv[2] Conditions=sys.argv[3].split() Gene=file_name.split('.')[0] coordinates=eval(Gene) #uses the coord defined variables and the parsed Gene name
##plot the test data if you like #plt.scatter(test_data.T[0], test_data.T[1], color='b', **plot_kwds) #plt.show() ##the mahalanobis distance covariance matrix (not inverted yet) ss = np.array([[1.0,0],[0.0,1.0]]) clusterer = hdbscan.HDBSCAN(min_cluster_size=5, V=ss, metric='mahalanobis') clusterer.fit(test_data) ##plot a single linkage tree of the clustering #clusterer.single_linkage_tree_.plot(cmap='viridis', colorbar=True) #plt.show() ##plot a condensed tree #clusterer.condensed_tree_.plot() #plt.show() ##plot the clusters found by the HDBSCAN run palette = sns.color_palette() ##assign colors by cluster label, and depth/saturation of color by probability cluster_colors = [sns.desaturate(palette[col], sat) if col >= 0 else (0.5, 0.5, 0.5) for col, sat in zip(clusterer.labels_, clusterer.probabilities_)] plt.scatter(test_data.T[0], test_data.T[1], c=cluster_colors, **plot_kwds) plt.show() pdb.set_trace() print 'here'