def plot_and_savefig(self, out_path=None): sns.set_context('notebook') sns.set_style('white') plot_w = 3 + len(self.data['sample'].unique()) plot_h = 3.5 plots_per_row = 3 n_plots = len(self.data.columns) - 2 n_rows = ceil(n_plots / plots_per_row) n_cols = ceil(n_plots / n_rows) ax_ids = list(np.arange(n_plots) + 1) fig = plt.figure() fig.set_figheight(plot_h * n_rows) fig.set_figwidth(plot_w * n_cols) for i, category in enumerate(self.data.columns): if category in ['CATEGORY', 'sample']: continue ax = fig.add_subplot(n_rows, n_cols, ax_ids.pop(0)) self.draw_ax(ax, category) if i == 0: ax.legend() else: ax.legend_.set_visible(False) plt.tight_layout() if out_path: plt.savefig(out_path, dpi=300, bbox_inches='tight') return ax
def dist_small_multiples(df, figsize=(20, 20)): """ Small multiples plots of the distribution of a dataframe's variables. """ import math sns.set_style("white") num_plots = len(df.columns) n = int(math.ceil(math.sqrt(num_plots))) fig = plt.figure(figsize=figsize) axes = [plt.subplot(n, n, i) for i in range(1, num_plots + 1)] i = 0 for k, v in df.iteritems(): ax = axes[i] sns.kdeplot(v, shade=True, ax=ax, legend=False) sns.rugplot(v, ax=ax, c=sns.color_palette("husl", 3)[0]) [label.set_visible(False) for label in ax.get_yticklabels()] ax.xaxis.set_ticks([v.min(), v.max()]) ax.set_title(k) i += 1 sns.despine(left=True, trim=True, fig=fig) plt.tight_layout() return fig, axes
def UseSeaborn(palette='deep'): """Call to use seaborn plotting package """ import seaborn as sns #No Background fill, legend font scale, frame on legend sns.set(style='whitegrid', font_scale=1.5, rc={'legend.frameon': True}) #Mark ticks with border on all four sides (overrides 'whitegrid') sns.set_style('ticks') #ticks point in sns.set_style({"xtick.direction": "in","ytick.direction": "in"}) # sns.choose_colorbrewer_palette('q') #Nice Blue,green,Red # sns.set_palette('colorblind') if palette == 'xkcd': #Nice blue, purple, green sns.set_palette(sns.xkcd_palette(xkcdcolors)) else: sns.set_palette(palette) #Nice blue, green red # sns.set_palette('deep') # sns.set_palette('Accent_r') # sns.set_palette('Set2') # sns.set_palette('Spectral_r') # sns.set_palette('spectral') #FIX INVISIBLE MARKER BUG sns.set_context(rc={'lines.markeredgewidth': 0.1})
def style(mod = None): sb.set(font_scale=0.8) # sb.set_style("white") # sns.set_style("ticks") sb.set_style({'lines.linewidth': 0.3, 'axes.labelcolor': '.0', 'axes.linewidth': 0.5, 'axes.edgecolor': '.2', 'axes.facecolor': 'white', 'axes.grid': True, 'font.family': ['sans-serif'], 'font.sans-serif': ['Arial'], 'grid.linewidth': 0.5, 'grid.color': '.9', 'text.color': '.0', 'savefig.dpi': 100, 'xtick.color': '.0', 'ytick.color': '.0', 'xtick.color': '.0', 'xtick.direction': 'in', 'xtick.major.size': 3.0, 'xtick.minor.size': 1, 'xtick.major.width': 0.5, 'xtick.minor.width': 0.5, 'xtick.major.pad':3, 'ytick.color': '.0', 'ytick.direction': 'in', 'ytick.major.size': 3.0, 'ytick.minor.size': 1, 'ytick.major.width': 0.5, 'ytick.minor.width': 0.5, 'ytick.major.pad':3, # 'axes.labelpad': 0.3, 'savefig.transparent': True, })
def prepare_plots(self): """ Generates analysis plots used for the final report (as seen in Fig. 7). """ sns.set_style('darkgrid') fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlabel('Initial population size (N)') ax.set_ylabel('Variation of adult counts over 10 years') a = np.arange(1, len(self.initial_pops)+1, 1) b, c = [], [] for pop in self.initial_pops: result = self.result_dict[pop] b.append(result['mean_stdev']) c.append(result['ci']) ax.errorbar(a,b,yerr=c) ax.scatter(a,b,s=40) ax.plot(a,b) plt.xticks(a,self.initial_pops) plt.show() plt.savefig('results/results.png', bbox_inches='tight')
def plot_dist_matrix(matrix, fasta_names, heatmap_out, dendrogram_out): """Cluster the distance matrix hierarchically and plot using seaborn. Average linkage method is used.""" # Load required modules for plotting import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from scipy.cluster.hierarchy import dendrogram, linkage # Create pdm = pd.DataFrame(matrix, index=fasta_names, columns=fasta_names) # Plot heatmap figsizex = max(10, len(fasta_names) / 4) clustergrid = sns.clustermap(pdm, metric='euclidean', method='average', figsize=(figsizex, figsizex)) clustergrid.savefig(heatmap_out) # Plot dendrogram sns.set_style('white') figsizey = max(10, len(fasta_names) / 8) f, ax = plt.subplots(figsize=(figsizex, figsizey)) link = linkage(pdm, metric='euclidean', method='average') dendrogram(link, labels=pdm.index, ax=ax) no_spine = {'left': True, 'bottom': True, 'right': True, 'top': True} sns.despine(**no_spine) plt.xticks(rotation=90) f.tight_layout() plt.savefig(dendrogram_out)
def pltsns(style='ticks',context='talk'): global figdir sns.set_style(style) sns.set_style({'legend.frameon':True}) sns.set_context(context) #figdir = datadir+'samoa/WATERSHED_ANALYSIS/GoodFigures/rawfigoutput/' return
def showResults(challenger_data, model): ''' Show the original data, and the resulting logit-fit''' temperature = challenger_data[:,0] failures = challenger_data[:,1] # First plot the original data plt.figure() setFonts() sns.set_style('darkgrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(temperature, failures, s=200, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside Temperature [F]") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.tight_layout # Plot the fit x = np.arange(50, 85) alpha = model.params[0] beta = model.params[1] y = logistic(x, beta, alpha) plt.hold(True) plt.plot(x,y,'r') plt.xlim([50, 85]) outFile = 'ChallengerPlain.png' showData(outFile)
def plot_op(operation): """ Plots operation for all models """ df = pd.read_csv(RESULT_FOLDER + RESULT_FILE, usecols=[1, 2, 3]) print(df.columns) df.columns = ['mo', 'node', 'time'] #print df.head() ele = mo(operation) qpare = df[df.mo == ele[0]] qpare = qpare.append(df[df.mo == ele[1]]) qpare = qpare.append(df[df.mo == ele[2]]) qpare = qpare.append(df[df.mo == ele[3]]) f, ax = plt.subplots() ax.set(yscale="log") ax.set_title('Query time') sns.set_style("whitegrid") sns.boxplot(x='mo', y='time', data=qpare) ax.set_xlabel("model-operation") ax.set_ylabel("time [s]") #sns.plt.show() sns.plt.savefig(RESULT_FOLDER + operation + '.png') sns.plt.clf()
def show_binomial(): """Show an example of binomial distributions""" bd1 = stats.binom(20, 0.5) bd2 = stats.binom(20, 0.7) bd3 = stats.binom(40, 0.5) k = np.arange(40) sns.set_context('paper') sns.set_style('ticks') mystyle.set(14) markersize = 8 plt.plot(k, bd1.pmf(k), 'o-b', ms=markersize) plt.hold(True) plt.plot(k, bd2.pmf(k), 'd-r', ms=markersize) plt.plot(k, bd3.pmf(k), 's-g', ms=markersize) plt.title('Binomial distribuition') plt.legend(['p=0.5 and n=20', 'p=0.7 and n=20', 'p=0.5 and n=40']) plt.xlabel('X') plt.ylabel('P(X)') sns.despine() mystyle.printout_plain('Binomial_distribution_pmf.png') plt.show()
def main(): seaborn_Seaborn_Module.set_style("dark") housing_2013 = pandas_Pandas_Module.read_csv("../Hud_2013.csv") cols = ['AGE1', 'FMR', 'TOTSAL'] filtered_housing_2013 = housing_2013[cols] filtered_housing_2013.hist(column='FMR', bins=20) matplotlib_pyplot_Pyplot_Module.show()
def make_plotdir(): "make plot directory on file system" sns.set_style("darkgrid") plotdir = get_plotdir() if not os.access(plotdir, os.F_OK): os.mkdir(plotdir) return plotdir
def generate_plot(csv_file_name, plot_file_name, x, y, hue, y_title, xticklabels_rotation=90): sns.set(font_scale=1.5) sns.set_style("white", {"legend.frameon": True}) df = pd.read_csv(csv_file_name) ax = sns.barplot(data=df, x=x, y=y, hue=hue, palette=sns.color_palette("Paired")) ax.set_xlabel('') ax.set_ylabel(y_title) labels = ax.get_xticklabels() ax.set_xticklabels(labels, rotation=xticklabels_rotation) fig = ax.get_figure() if hue: legend = ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.) legend.set_label('') fig.savefig(plot_file_name, bbox_extra_artists=(legend,), bbox_inches='tight') fig.savefig(plot_file_name + '.jpg', bbox_extra_artists=(legend,), bbox_inches='tight') else: fig.tight_layout() fig.savefig(plot_file_name) fig.savefig(plot_file_name + '.jpg') plt.clf() plt.close('all')
def showResults(challenger_data, model): ''' Show the original data, and the resulting logit-fit''' # First plot the original data plt.figure() sns.set_context('poster') sns.set_style('whitegrid') np.set_printoptions(precision=3, suppress=True) plt.scatter(challenger_data[:, 0], challenger_data[:, 1], s=75, color="k", alpha=0.5) plt.yticks([0, 1]) plt.ylabel("Damage Incident?") plt.xlabel("Outside temperature (Fahrenheit)") plt.title("Defects of the Space Shuttle O-Rings vs temperature") plt.xlim(50, 85) # Plot the fit x = np.arange(50, 85) alpha = model.params[0] beta = model.params[1] y = logistic(x, beta, alpha) plt.hold(True) plt.plot(x,y,'r') outFile = 'ChallengerPlain.png' C2_8_mystyle.printout_plain(outFile, outDir='..\Images') plt.show()
def hist_plot(self, bokeh=False): """ Simple histogram plot of the PDF Parameters ---------- bokeh : bool, optional Generate a bokeh plot? Returns ------- """ if not bokeh: from matplotlib import pyplot as plt # imports try: import seaborn as sns; sns.set_style("white") except: pass # Giddy up plt.clf() plt.bar(self.x-self.dx/2., self.pdf, width=self.dx) plt.xlabel("x") plt.ylabel("PDF(x)") plt.show() plt.close() else: from bokeh.io import show from bokeh.plotting import figure p = figure(plot_width=400, plot_height=400, title='x PDF') p.quad(top=self.pdf, bottom=0, left=self.x-self.dx/2., right=self.x+self.dx/2.) p.xaxis.axis_label = 'x' # Show show(p)
def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'): # collect embeddings for mfi: X = np.asarray([self.w2v_model[w] for w in self.mfi \ if w in self.w2v_model], dtype='float32') # dimension reduction: tsne = TSNE(n_components=2) coor = tsne.fit_transform(X) # unsparsify plt.clf() sns.set_style('dark') sns.plt.rcParams['axes.linewidth'] = 0.4 fig, ax1 = sns.plt.subplots() labels = self.mfi # first plot slices: x1, x2 = coor[:,0], coor[:,1] ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none') # clustering on top (add some colouring): clustering = AgglomerativeClustering(linkage='ward', affinity='euclidean', n_clusters=nb_clusters) clustering.fit(coor) # add names: for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_): ax1.text(x, y, name, ha='center', va="center", color=plt.cm.spectral(cluster_label / 10.), fontdict={'family': 'Arial', 'size': 8}) # control aesthetics: ax1.set_xlabel('') ax1.set_ylabel('') ax1.set_xticklabels([]) ax1.set_xticks([]) ax1.set_yticklabels([]) ax1.set_yticks([]) sns.plt.savefig(outputfile, bbox_inches=0)
def features_pca_classified(fscaled, labels_true, labels_predict, axes=None, algorithm="pca"): if algorithm == 'pca': pc = PCA(n_components=2) fscaled_trans = pc.fit(fscaled).transform(fscaled) elif algorithm == "tsne": fscaled_trans = TSNE(n_components=2).fit_transform(fscaled) else: raise AlgorithmUnrecognizedException("Not recognizing method of "+ "dimensionality reduction.") sns.set_style("whitegrid") plt.rc("font", size=24, family="serif", serif="Computer Sans") plt.rc("axes", titlesize=20, labelsize=20) plt.rc("text", usetex=True) plt.rc('xtick', labelsize=20) plt.rc('ytick', labelsize=20) # make a Figure object if axes is None: fig, axes = plt.subplots(1,2,figsize=(16,6), sharey=True) ax1, ax2 = axes[0], axes[1] ax1 = plotting.scatter(fscaled_trans, labels_true, ax=ax1) # second panel: physical labels: ax2 = plotting.scatter(fscaled_trans, labels_predict, ax=ax2) plt.tight_layout() return ax1, ax2
def plot_swcrel(data, xlabel, ylabel): month_lab = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', \ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] sns.set_style("ticks") plt.rcParams.update({'mathtext.default': 'regular'}) kws = dict(s=20, linewidth=.5, edgecolor="none", alpha=0.3) wue_plot = sns.FacetGrid(data, hue="Month", size=5) wue_plot.map(plt.scatter, xlabel, ylabel, **kws) ymax = np.ceil(data[ylabel].mean() + 3*data[ylabel].std()) xmax = np.max(data[xlabel]) xmin = np.min(data[xlabel]) x_ticks = np.arange(0, 0.4, 0.05) for wax in wue_plot.axes.ravel(): wax.xaxis.set_ticks(x_ticks) wax.xaxis.set_ticklabels(['%1.2f' %x for x in x_ticks], \ rotation=45, ha="right", fontsize=10) wue_plot.set(xlim=(xmin, xmax), ylim=(0, ymax)) leg = plt.legend(loc='right', labels=month_lab, ncol=1, bbox_to_anchor=(1.3, 0.5), \ borderpad=2) leg.get_frame().set_edgecolor('black') wue_plot.fig.subplots_adjust(right=0.8, wspace=.08, hspace=0.15, top=0.9, bottom=0.25) return wue_plot
def main(): runResults = [] # Traverse files, extract matrix, architecture and params for f in [f for f in os.listdir(".") if os.path.isfile(f)]: if f.startswith("run_Spmv"): runResults.append(RunResult(f)) df = pd.DataFrame([[r.prj, r.matrix, r.gflops_est] for r in runResults]) grouped = df.groupby(0) groups = [] names = [] for name, group in grouped: group.set_index(1, inplace=True) # group.sort_index(inplace=True) groups.append(group[2]) names.append(name) new_df = pd.concat(groups, axis=1) new_df.columns = names sns.set_style("white") sns.set_palette(sns.color_palette("cubehelix", 13)) bar = new_df.plot(kind="bar") sns.despine() fig = bar.get_figure() fig.set_size_inches(15, 15) fig.tight_layout() fig.savefig("est_gflops.pdf")
def PlotFrequencyTuningCurves(self, stResponseProb, measure, unit=[], filePath=[]): """ Plots measure for multiple frequencies, with a trace for each tone intensity. :param stResponseProb: DataFrames results of Bayesian response analysis for multiple tone stimulus intensities :type stResponseProb: pandas DataFrame :param measure: Bayesian response analysis measure ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration'] :type measure: int [0-9] :param unit: Unique identifier for cell :type unit: str :param filePath: Path to directory where results will be saved :type filePath: str :returns: Handle to plot """ measureName = ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration'] tuningData = stResponseProb # sns.set_palette(sns.color_palette("bright", 8)) attn = stResponseProb.keys()[0] firstFreq = stResponseProb[attn].index.tolist()[1] sns.set_style("white") sns.set_style("ticks") ax = stResponseProb.loc[:,firstFreq:,measure].fillna(0).plot(figsize=(6,4)) sns.despine() plt.grid(False) plt.title(unit, fontsize=14) plt.xlabel('Frequency (kHz)', fontsize=12) plt.ylabel(measureName[measure], fontsize=12) plt.tick_params(axis='both', which='major', labelsize=14) if len(filePath)>0: plt.savefig(self.dirPath + filePath + 'freqTuning_'+measureName[measure]+'_'+unit+'.pdf') plt.close() else: plt.show() return ax
def PlotBBNResponseCurve(self, bbnResponseProb, measure, unit=[], filePath=[], attn=False): """ Plots measure for multiple frequencies and intensities an a contour plot. :param stResponseProb: DataFrames results of Bayesian response analysis for multiple tone stimulus intensities :type stResponseProb: pandas DataFrame :param measure: Bayesian response analysis measure ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration'] :type measure: integer [0-9] :param unit: Unique identifier for cell :type unit: str :param filePath: Path to directory where results will be saved :type filePath: str :returns: Handle to plot """ measureName = ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration'] tuningData = bbnResponseProb sns.set_palette(sns.color_palette("bright", 8)) sns.set_context(rc={"figure.figsize": (5, 3)}) sns.set_style("white") sns.set_style("ticks") if attn: ax = bbnResponseProb.loc[::-1,measure].fillna(0).plot(figsize=(6,4)) else: ax = bbnResponseProb.loc[:,measure].fillna(0).plot(figsize=(6,4)) sns.despine() plt.grid(False) plt.title(unit, fontsize=14) plt.xlabel('SPL (dB)', fontsize=12) plt.ylabel(measureName[measure], fontsize=12) plt.ylim(0.5,1.0) # plt.gca().invert_xaxis() if len(filePath)>0: plt.savefig(self.dirPath + filePath + 'bbn_'+measureName[measure]+'_'+unit+'.pdf') plt.close() else: plt.show() return ax
def plot_proximity_heatmap(product_space_orig, proximity_matr): """ Given the proximity matrix and the product space matrix produces the heatmap (simply based on sroting) @param proximity: @param product_space_orig: @return: """ x = sorted(product_space_orig) y = sorted(product_space_orig) intensity = [0]*len(product_space_orig) i = 0 for product in x: intensity[i] = [0]*len(product_space_orig) j = 0 for product2 in y: if product in proximity_matr and product2 in proximity_matr[product]: intensity[i][j] = proximity_matr[product][product2] elif product2 in proximity_matr and product in proximity_matr[product2]: intensity[i][j] = proximity_matr[product2][product] elif product == product2: intensity[i][j] = 1 else: pass j += 1 i += 1 intensity = np.array(intensity) f, ax = plt.subplots(figsize=(5, 4)) sns.set_style("ticks", {'axes.edgecolor': '.0', 'axes.facecolor': 'black'}) fd = sns.heatmap(intensity, xticklabels=False, yticklabels=False, cmap="RdYlBu_r", square=True) f.text(0.865, 0.5, r"Proximity $\phi$", ha='right', va='center', rotation='vertical', fontsize=13) f.tight_layout() plt.savefig('data/proximityheat.pdf')
def multi_plot_multi_model_metrics(self): index = list(range(len(self.model_list))) bw = 0.35 score_list = ['Accuracy', 'Precision', 'Recall', 'F1', 'ROCAUC'] plt.figure(figsize=(18,5)) for j, scoring in enumerate(score_list): ax = plt.subplot(151 + j) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) sns.set_style("whitegrid") plt.bar(index, self.score_dict[scoring], bw, align = 'center', #color = colors[(i*2)], alpha = 0.6, label = self.index_func) plt.title(scoring, fontsize=15, fontweight='bold') plt.xticks(index, self.index_func, rotation='vertical') plt.ylim(0.0, 1.1) if j == 0: plt.ylabel('Score',fontsize=20, fontweight='bold') #if j == 4: # plt.legend() plt.grid(False)
def compare_spectra(): import mywfc3.stgrism as st import unicorn ### Fancy colors import seaborn as sns import matplotlib.pyplot as plt cmap = sns.cubehelix_palette(as_cmap=True, light=0.95, start=0.5, hue=0.4, rot=-0.7, reverse=True) cmap.name = 'sns_rot' plt.register_cmap(cmap=cmap) sns.set_style("ticks", {"ytick.major.size":3, "xtick.major.size":3}) plt.set_cmap('sns_rot') #plt.gray() fig = st.compare_methods(x0=787, y0=712, v=np.array([-1.5,4])*0.6, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.02) #fig.tight_layout() unicorn.plotting.savefig(fig, '/tmp/compare_model_star.pdf', dpi=300) fig = st.compare_methods(x0=485, y0=332, v=np.array([-1.5,4])*0.2, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.1) unicorn.plotting.savefig(fig, '/tmp/compare_model_galaxy.pdf', dpi=300) fig = st.compare_methods(x0=286, y0=408, v=np.array([-1.5,4])*0.08, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.1) unicorn.plotting.savefig(fig, '/tmp/compare_model_galaxy2.pdf', dpi=300) fig = st.compare_methods(x0=922, y0=564, v=np.array([-1.5,4])*0.2, NX=180, NY=40, direct_off=100, final=True, mask_lim = 0.15) unicorn.plotting.savefig(fig, '/tmp/compare_model_galaxy3.pdf', dpi=300)
def printKeyTypeDNAwithLabels(keyGroups,keydataDF,labelsDF,outputFile="",printIt=False): sns.set_style("white") labelDict = {'fontsize': 16, 'weight' : 'roman'} fig,ax = plt.subplots(figsize=(18,10)) for g in keyGroups.groups: colours = [c for c in keyGroups.get_group(g)['key colour']] x = [i for i in keyGroups.get_group(g)['keycode'].index] y = [k for k in keyGroups.get_group(g)['keycode']] #ax.scatter(x,y,s=100,marker='|',c=colours,linewidths=1,alpha=0.8,label=g) ax.scatter(x,y,s=30,marker='o',c=colours,linewidths=0,alpha=0.5,label=g) colours = sns.color_palette("GnBu_d",len(labelsDF)) for n,(d,l) in enumerate(zip(labelsDF.index,labelsDF['label'])): ax.plot([d,d],[0,225],color=colours[n],linewidth=3,alpha=0.5,label=l) box = ax.get_position() ax.set_position([box.x0,box.y0+box.height*0.7,box.width,box.height*0.3]) ax.set_xlim(keydataDF.index[0],keydataDF.index[-1]) ax.legend(loc='upper center',bbox_to_anchor=(0.5,-0.4)) ax.set_ylabel("keycode",fontdict=labelDict) ax.set_xlabel("clock",fontdict=labelDict) plt.show() if printIt: fig.savefig(outputFile,format='png',dpi=256) plt.close(fig) plt.clf() return
def stripplot_to_pdf(data, save_path, x=None, y=None, hue=None, style='whitegrid', fontsize=2, rows=1, cols=1, figsize=(4, 4), **kwargs): """ Data plotted as stripplot using seaborn and saved in a pdf given in save_path Parameters ---------- data : pd.DataFrame or path to csv file single or list of data to plot into pdf. save_path : str Path to save the pdf plot. """ if isinstance(data, basestring): data = pd.read_csv(data) if isinstance(data, (list, tuple)): cols = len(data) if not isinstance(data, (list, tuple)): data = [data, ] sns.set_style(style) sns.set(font_scale=fontsize) with PdfPages(save_path) as pdf: fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=figsize, squeeze=True, sharey=True) axes = axes.reshape(-1) for ax, d in zip(axes, data): sns.stripplot(x=x, y=y, hue=hue, data=d, ax=ax, **kwargs) pdf.savefig(fig) plt.close()
def plotrfACC(): #data = json.loads(open('rf_accs.json').read()) data = json.loads(open('rf_accs_top3.json').read()) data = json.loads(open('rf_accs_nowindow.json').read()) nLetter = 3 #14 data["texts/ADHD_various_half/"] = [data["texts/ADHD_various_half/"][i] for i in [1,2,3]] sns.set_style("dark") #f, (ax1, ax2) = plt.subplots(1, 2) f, ax1 = plt.subplots() bar1 = ax1.bar(range(nLetter),data["texts/ADHD_various_half/"]) ax1.set_title('RF accs for half SAX') plt.sca(ax1) plt.xticks(np.arange(nLetter) + .4, range(3,nLetter+3)) plt.xlabel('# of bins (letters)/word') ax1.set_ylim([0.6,0.9]) #bar2 = ax2.bar(range(nLetter),data["texts/ADHD_various_full/"]) #ax2.set_title('RF accs for full SAX') #plt.sca(ax2) #plt.xticks(np.arange(nLetter) + .4, range(2,nLetter+2)) #plt.xlabel('# of bins (letters)/word') #ax2.set_ylim([0.6,0.9]) plt.show()
def single_plot_multi_model_metrics(self): default_index = list(range(len(self.model_list))) bw = 0.15 score_list = ['Accuracy', 'Precision', 'Recall', 'F1', 'ROCAUC'] plt.figure(figsize=(18,5)) for j,scoring in enumerate(score_list): ax = plt.subplot(111) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) sns.set_style("whitegrid") index = [i+bw*j for i in default_index] plt.bar(index, self.score_dict[scoring], bw, align = 'center', color = self.colors[(3+j)], alpha = 0.6, label = scoring) plt.title('Scores for Different Models', fontsize=15, fontweight='bold') tick_location = [i for i in default_index] plt.xticks(tick_location, self.index_func, rotation=60) plt.ylim(0.0, 1.1) if j == 0: plt.ylabel('Score',fontsize=20, fontweight='bold') if j == 4: plt.legend(loc='best') plt.grid(False)
def build_wordmap(w2v_mat): pca = PCA(n_components=2) pca.fit(w2v_mat.T) w2v_pca = pca.transform(w2v_mat.T) km = KMeans(n_clusters=6) labels = km.fit_predict(w2vt_mat.T) colors = 255 * ScalarMappable(cmap='Paired').to_rgba(np.unique(labels))[:, :3] hex_colors = ['#%02x%02x%02x' % (r, g, b) for r,g,b in colors] sns.set_style('dark') fig, ax = plt.subplots(1,1, figsize=(1.5,1.5)) ax.axis('off') # ax = fig.add_subplot(111) for i in range(w2vt_pca.shape[0]): plt.text(w2vt_pca[i, 0], w2vt_pca[i, 1], str(vocab[i]), fontdict={'color': hex_colors[labels[i]], 'size': 12}) return ax
def draw_chart(chart_name,measure,axis,val_ordinate,train_ordinate,test_ordinate,dst_folder): plt.style.use('seaborn') sns.set(font_scale=1.2) sns.set_style({'font.family': 'serif'}) fig, ax = plt.subplots(figsize=(8, 8)) ttl = ax.title ttl.set_position([.5, 1.05]) plt.tick_params(axis='both', which='major', labelsize=8) plt.tick_params(axis='both', which='minor', labelsize=8) ax.set_title(' '.join(chart_name.replace('_test_es_50_lr_1e-05_l2_0_0_mc_3_hsize_250','').split('_')[1:])) plt.yticks(np.arange(0, 1.1, 0.1)) ax.set_ylim(0, 1) plt.xticks(np.arange(0, 11, 1)) ax.set_xlim(0, 10) plt.grid(True) plt.xlabel('epochs') plt.ylabel(measure) if val_ordinate is not None: ax.plot(axis, val_ordinate, color=sns.xkcd_rgb["pale red"], marker='.', label='validation') # plotting t, a separately if train_ordinate is not None: ax.plot(axis, train_ordinate,color=sns.xkcd_rgb["medium green"], marker='.', label='train') # plotting t, b separately if test_ordinate is not None: ax.plot(axis, test_ordinate, color=sns.xkcd_rgb["denim blue"], marker='.', label='test') # plotting t, c separately ax.legend() plt.savefig(os.path.join(dst_folder,'{}_{}.pdf'.format('_'.join(chart_name.split('_')[1:]).replace('_test_es_50_lr_1e-05_l2_0_0_mc_3_hsize_250',''),measure)),dpi=300,bbox_inches='tight')
def query_speed_fig(fake_data=False, fname='query_speed', with_matmuls=True, camera_ready=False): # experiment params: fixed N = 100k, D = 256, Q = 1024; # layout: rows = 8B, 16B, 32B; bar graph in each row # alternative: plot in each row vs batch size # algos: Bolt; PQ; OPQ; PairQ; Matmul, batch={1, 16, 64, 256} sb.set_context("talk") # if camera_ready: # white style overwrites our fonts # matplotlib.rcParams['font.family'] = CAMERA_READY_FONT set_palette(ncolors=8) # fig, axes = plt.subplots(3, 1, figsize=(6, 8)) fig, axes = plt.subplots(3, 1, figsize=(6, 8), dpi=300) if fake_data: # for debugging ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ', # 'Matmul Batch 1', 'Matmul Batch 16', 'Matmul Batch 64', 'Matmul Batch 256'] # 'Matmul Batch1', 'Matmul Batch16', 'Matmul Batch64', 'Matmul Batch256'] 'Matmul 1', 'Matmul 16', 'Matmul 64', 'Matmul 256'] algo2offset = {'Bolt': 100, 'PQ': 50, 'OPQ': 30, 'PairQ': 25, # 'Matmul Batch 1': 1, 'Matmul Batch 16': 16, # 'Matmul Batch 64': 64, 'Matmul Batch 256': 256} # 'Matmul Batch1': 1, 'Matmul Batch16': 16, # 'Matmul Batch64': 64, 'Matmul Batch256': 256} 'Matmul 1': 1, 'Matmul 16': 16, 'Matmul 64': 64, 'Matmul 256': 256} for i, nbytes in enumerate([8, 16, 32]): bytes_str = '{}B'.format(nbytes) dicts = [] for algo in ALGOS: dps = np.random.randn(10) + 256 / nbytes dps += algo2offset[algo] / nbytes dicts += [{'algo': algo, 'nbytes': bytes_str, 'y': y} for y in dps] df = pd.DataFrame.from_records(dicts) else: # ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ', 'Matmul 1', # 'Matmul 16', # 'Matmul 64', 'Matmul 256', 'Matmul 1024'] if with_matmuls: ALGOS = ['Bolt', 'Binary Embedding', 'PQ', 'OPQ', 'Matmul 1', 'Matmul 256', 'Matmul 1024'] else: ALGOS = ['Bolt', 'Binary Embedding', 'PQ', 'OPQ'] df = results.query_speed_results() df['y'] = df['y'] / 1e9 # convert to billions print "df cols: ", df.columns df.rename(columns={'algo': ' '}, inplace=True) # hide from legend # ax = sb.barplot(x='x', y='y', hue=' ', ci=95, data=df, ax=axes[i]) for i, nbytes in enumerate([8, 16, 32]): bytes_str = '{}B'.format(nbytes) data = df[df['nbytes'] == nbytes] ax = sb.barplot(x='nbytes', y='y', hue=' ', hue_order=ALGOS, ci=95, # data=data, ax=axes[i]) # data=data, ax=axes[i], errwidth=10) data=data, ax=axes[i], capsize=.0004) # data=data, ax=axes[i], capsize=.0004, errwidth=6) # ------------------------ clean up / format axes for ax in axes[:-1]: # remove x labels except for bottom axis plt.setp(ax.get_xticklabels(), visible=False) ax.get_xaxis().set_visible(False) end = .5 * (len(ALGOS) / float((len(ALGOS) + 2))) start = -end tick_positions = np.linspace(start + .02, end - .05, len(ALGOS)) if camera_ready: tick_positions[0] += .02 tick_positions[2] += .02 tick_positions[3] += .01 for ax in axes: ax.set_xlim([start - .02, end + .02]) if camera_ready: # ax.set_ylabel('Billions of\nDistances/s', y=.4, # ax.set_ylabel('Billions of\nDistances/s', y=.5, ax.set_ylabel('Billion Distances/s', y=.49, # .5 = centered ? family=CAMERA_READY_FONT) else: ax.set_ylabel('Billions of Distances/s') ax.legend_.remove() if not fake_data: ax.set_ylim(0, 2.5) # add byte counts on the right fmt_str = "{}B Encodings" sb.set_style("white") # adds border (spines) we have to remove for i, ax in enumerate(axes): ax2 = ax.twinx() sb.despine(ax=ax2, top=True, left=True, bottom=True, right=True) ax2.get_xaxis().set_visible(False) # ax2.get_yaxis().set_visible(False) # nope, removes ylabel plt.setp(ax2.get_xticklabels(), visible=False) plt.setp(ax2.get_yticklabels(), visible=False) ax2.yaxis.set_label_position('right') if camera_ready: # ax2.set_ylabel(fmt_str.format((2 ** i) * 8), y=.39, ax2.set_ylabel(fmt_str.format((2 ** i) * 8), labelpad=10, fontsize=14, family=CAMERA_READY_FONT) else: ax2.set_ylabel(fmt_str.format((2 ** i) * 8), labelpad=10, fontsize=15) # ------------------------ have bottom / top axes print title, x info if camera_ready: # axes[0].set_title('Distance Computations per Second', x=.39, y=1.02) # axes[0].set_title('Distance Computations per Second', x=.42, y=1.02, # family=CAMERA_READY_FONT) axes[0].set_title('Distance Computations per Second', y=1.02, family=CAMERA_READY_FONT, fontsize=15) else: axes[0].set_title('Distance Computations per Second', y=1.02) # axes[-1].set_xticks(tick_positions) for ax in axes: axes[-1].set_xticks(tick_positions) ax.set_xlim(-.4, .4) # no idea why this makes the bars fit right... xlabels = ["\n".join(name.split(' ')) for name in ALGOS] if not camera_ready: for i, lbl in enumerate(xlabels): if '\n' in lbl: # shift label up by adding another line xlabels[i] = xlabels[i] + '\n' # xlabels = ["\nBatch".join(name.split(' Batch')) for name in ALGOS] # xlabels = ALGOS axes[-1].set_xticklabels(xlabels, rotation=70) if camera_ready: # axes[-1].tick_params(axis='x', which='major', pad=15) # axes[-1].tick_params(axis='x', which='major', pad=13) axes[-1].tick_params(axis='x', which='major', pad=4) # axes[-1].set_xticklabels(xlabels, rotation=70, y=-.02) # else: # axes[-1].set_xticklabels(xlabels, rotation=70) # if camera_ready: # axes[-1].set_xlabel("", labelpad=10) # else: axes[-1].set_xlabel("", labelpad=-20) # plt.setp(axes[-1].get_xlabel(), visible=False) # doesn't work # ------------------------ show / save plot # plt.tight_layout() plt.tight_layout() if camera_ready: plt.subplots_adjust(hspace=.18) # save_fig(fname) # MPL conversion to pdf is selectively braindead for just this plot; it # lays things out horribly in a way that doesn't match the results # of show() at all. Just export as high-density png as a workaround # plt.savefig(os.path.join(SAVE_DIR, fname + '.png'), # dpi=300, bbox_inches='tight') save_fig_png(fname)
date_min = series['Timestamp'][0] if len(series['Timestamp']) > 0 else 0 date_max = series['Timestamp'][len(series)-1] if len(series['Timestamp']) > 0 else 0 data = {'Timestamp': [], 'Transactions': [], 'USD': []} for d in pd.date_range(start=date_min, end=date_max): t = d.strftime('%Y-%m-%d') for i in range(len(series)): if t == series['Timestamp'][i]: data['Timestamp'].append(t) data['Transactions'].append(row) data['USD'].append(np.pi*8*magnitude(series['USD'][i])) data['Timestamp'] = [datetime.strptime(d, "%Y-%m-%d") for d in data['Timestamp']] return data matplotlib.use('Agg') seaborn.set_style("whitegrid", {'axes.grid': False}) flatui = ["#3498db"] seaborn.set_palette(flatui) series1 = load_data('reentrancy_timelime.csv', 4) series2 = load_data('parity_wallet_hacks_timelime.csv', 3) series3 = load_data('integer_overflow_timelime.csv', 1) series4 = load_data('unhandled_exception_timelime.csv', 0) series5 = load_data('short_address_timelime.csv', 2) fig, ax = plt.subplots() ax.scatter('Timestamp', 'Transactions', data=series1, marker='.', s=5, linewidths=1) ax.scatter('Timestamp', 'Transactions', data=series1, linewidths=1, s=series1['USD'], alpha=0.3) ax.scatter('Timestamp', 'Transactions', data=series2, marker='.', s=5, linewidths=1)
def evolve_model(end_time, double_star, stars): time = 0 | units.yr dt = 0.05 * end_time / 1000. converter = nbody_system.nbody_to_si(double_star.mass, double_star.semimajor_axis) gravity = Hermite(converter) gravity.particles.add_particle(stars) to_stars = gravity.particles.new_channel_to(stars) from_stars = stars.new_channel_to(gravity.particles) period = get_period(double_star) print("Period =", period.as_string_in(units.yr)) print("Mass loss timestep =", dt) print("Steps per period: = {:1.2f}".format(period / dt)) a_an = [] | units.au e_an = [] atemp = double_star.semimajor_axis etemp = double_star.eccentricity ###### COMMON ENVELOPE STUFF ############### final_a = 40 | units.RSun mu = double_star.mass * constants.G Eps0 = mu / (2 * double_star.semimajor_axis) Eps1 = mu / (2 * final_a) # Eps_ce should come from alpha lambda model, but we just fix the final semimajor axis here for simplicity Eps_ce = Eps1 - Eps0 print("Eps_ce/Eps0", Eps_ce / Eps0) Tce = 1000 | units.yr Kce = K_from_eps(Eps0, Eps_ce, Tce, mu) print("Kce", Kce) Avisc = -Kce * Tce print("Avisc", Avisc.as_string_in(units.RSun**2)) Rvisc = Avisc.sqrt() / (4 * constants.pi) print("Rvisc", Rvisc.as_string_in(units.RSun)) vorb = (mu / double_star.semimajor_axis).sqrt() ###### END COMMON ENVELOPE STUFF ############### collision = False a = [] | units.au e = [] m = [] | units.MSun t = [] | units.yr while time < end_time: time += dt if not collision: gravity.evolve_model(time) to_stars.copy() kick_stars_comenv2(stars, dt, Kce, Avisc) from_stars.copy() from_stars.copy() orbital_elements = orbital_elements_from_binary(stars, G=constants.G) collision = check_collisions(stars) if atemp.number > 0: dadt = dadt_comenv_k0(atemp, etemp, Kce / Avisc) dedt = dedt_comenv_k0(atemp, etemp, Kce / Avisc) atemp = atemp + dadt * dt etemp = etemp + dedt * dt if collision and atemp.number < 0: break a_an.append(atemp) e_an.append(etemp) a.append(orbital_elements[2]) e.append(orbital_elements[3]) m.append(stars.mass.sum()) t.append(time) print("time=", time.in_(units.yr), "a=", a[-1].in_(units.RSun), "e=", e[-1], "m=", stars.mass.in_(units.MSun), end="\r") gravity.stop() from matplotlib import pyplot import seaborn as sns sns.set(font_scale=1.33) sns.set_style("ticks") fig, axis = pyplot.subplots(nrows=2, sharex=True) axis[0].plot(t.value_in(units.yr), a.value_in(units.RSun), label="nbody k=0") axis[0].plot(t.value_in(units.yr), a_an.value_in(units.RSun), label="analytic") axis[0].set_ylabel("semimajor axis [$R_\odot$]") axis[0].legend() axis[1].plot(t.value_in(units.yr), e) axis[1].plot(t.value_in(units.yr), e_an) axis[1].set_ylabel("eccentricity") axis[1].set_xlabel("time [yr]") axis[0].set_xlabel("time [yr]") pyplot.tight_layout() pyplot.subplots_adjust(hspace=0.0) pyplot.savefig("comenv2.png") pyplot.show()
def encoding_fig(fake_data=False, camera_ready=False): sb.set_style('darkgrid') # sb.set_context("talk", rc={"figure.figsize": (6, 6)}) sb.set_context("talk", rc={"figure.figsize": (7, 7)}) # sb.set_context("talk", rc={"figure.figsize": (8, 8)}) # sb.set_context("talk", rc={"figure.figsize": (9, 9)}) # fig, axes = plt.subplots(3, 1) fig, axes = plt.subplots(3, 2) # ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ'] ALGOS = ['Bolt', 'PQ', 'OPQ'] algo2offset = {'Bolt': 100, 'PQ': 50, 'OPQ': 30, 'PairQ': 25} lengths = [64, 128, 256, 512, 1024] # results_for_algos_lengths = # sb.set_palette("Set1", n_colors=len(ALGOS)) set_palette(ncolors=len(ALGOS)) if fake_data: data = np.random.randn(1, len(lengths), len(algo2offset)) for i, algo in enumerate(ALGOS): data[:, :, i] += algo2offset[algo] data /= np.arange(len(lengths)).reshape((1, -1, 1)) # ------------------------ data encoding # 8B encodings ax = axes[0, 0] # sb.tsplot(data=data, condition=condition, time=lengths, ax=ax) sb.tsplot(data=data, condition=None, time=lengths, ax=ax) # ax.set_title(prefix + ' Encoding Speed, 8B codes') ax.set_title('Data Encoding Speed', y=1.02) # 16B encodings data /= 2 ax = axes[1, 0] sb.tsplot(data=data, condition=None, time=lengths, ax=ax) # 32B encodings data /= 2 ax = axes[2, 0] sb.tsplot(data=data, condition=None, time=lengths, ax=ax) # ------------------------ query encoding data *= 8 data += np.random.randn(*data.shape) * 5 # 8B encodings ax = axes[0, 1] sb.tsplot(data=data, condition=None, time=lengths, ax=ax) # ax.set_title(prefix + ' Encoding Speed') ax.set_title('Query Encoding Speed', y=1.03, fontsize=16) # 16B encodings data /= 2 ax = axes[1, 1] sb.tsplot(data=data, condition=None, time=lengths, ax=ax) # 32B encodings data /= 2 ax = axes[2, 1] sb.tsplot(data=data, condition=ALGOS, time=lengths, ax=ax) else: # real data NBYTES_LIST = [8, 16, 32] df = results.encode_results() df_x = df[df['task'] == 'encode_x'] df_q = df[df['task'] == 'encode_q'] dfs = [df_x, df_q] # print df_x # return # dfs = [results.encode_data_results(), results.encode_lut_results()] ax_cols = [axes[:, 0], axes[:, 1]] for df, ax_col in zip(dfs, ax_cols): # for each col in subplots for b, nbytes in enumerate(NBYTES_LIST): # for each row in subplots ax = ax_col[b] plot_df = df.loc[df['nbytes'] == nbytes] plot_df = plot_df.loc[plot_df['algo'].isin(ALGOS)] sb.tsplot(value='y', condition='algo', unit='trial', time='D', data=plot_df, ax=ax, ci=95, n_boot=500) # data=plot_df, ax=ax, legend=False, ci=95, n_boot=500) # ------------------------ legend ax = axes.ravel()[-1] leg_lines, leg_labels = ax.get_legend_handles_labels() # ax.legend_.remove() # leg_lines, leg_labels = leg_lines[:len(ALGOS)], leg_labels[:len(ALGOS)] plt.figlegend(leg_lines, leg_labels, loc='lower center', ncol=len(ALGOS), labelspacing=0) # ------------------------ postproc + save plot for ax in axes.ravel(): ax.set_yscale("log") ax.legend_.remove() ax.set_ylim(5e3, 2e7) if camera_ready: # axes[0, 0].set_title('Data Encoding Speed', x=.45, y=1.03, fontsize=16) # axes[0, 1].set_title('Query Encoding Speed', x=.45, y=1.03, fontsize=16) axes[0, 0].set_title('Data Encoding Speed', x=.49, y=1.03, fontsize=18) axes[0, 1].set_title('Query Encoding Speed', x=.5, y=1.03, fontsize=18) else: axes[0, 0].set_title('Data Encoding Speed', y=1.03, fontsize=16) axes[0, 1].set_title('Query Encoding Speed', y=1.03, fontsize=16) # for ax in axes[0, :].ravel(): # ax.set_title('Vector Length') for ax in axes[:-1, :].ravel(): # ax.xaxis.set_visible(False) plt.setp(ax.get_xticklabels(), visible=False) ax.set_xlabel('', labelpad=-10) for ax in axes[-1, :].ravel(): # ax.set_xlabel('Vector Length') ax.set_xlabel('Vector Length', labelpad=7) for ax in axes[:, 0]: if camera_ready: # ax.set_ylabel('Vectors Encoded / s ', fontsize=12) ax.set_ylabel('Vectors Encoded / s', fontsize=13) else: ax.set_ylabel('Vectors Encoded / s') # only bottom row gets xlabels for ax in axes[:-1, :].ravel(): # plt.setp(ax.get_xticklabels(), visible=False) ax.set_xlabel('', labelpad=-10) # show byte counts on the right fmt_str = "{}B Encodings" # if camera_ready: # fmt_str += ' ' for i, ax in enumerate(axes[:, 1].ravel()): ax.yaxis.set_label_position('right') ax.set_ylabel(fmt_str.format((2 ** i) * 8), labelpad=10, fontsize=15) plt.tight_layout() plt.subplots_adjust(bottom=.15) if camera_ready: save_fig_png('encoding_speed') # bypass mpl truetype pdf ineptitude else: save_fig('encoding_speed')
def distortion_fig(fake_data=False, l2=True, suptitle=None, fname='l2_distortion', camera_ready=False): # experiment params: # datasets = Sift1M, Convnet1M, LabelMe22k, MNIST # bytes = [8, 16, 32] # layout: [ndatasets x nums_bytes] (ie, [4x3]) # each subplot a barplot showing corr with err bars DATASETS = ['Sift1M', 'Convnet1M', 'LabelMe', 'MNIST'] ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ'] NBYTES_LIST = [8, 16, 32] figsize = (6, 8) sb.set_style('darkgrid') sb.set_context("talk", rc={'xtick.major.pad': 3}) set_palette(ncolors=len(ALGOS)) # fig, axes = plt.subplots(4, 3) # fig, axes = plt.subplots(4, 1, figsize=figsize) fig, axes = plt.subplots(4, 1, figsize=figsize, dpi=300) axes = axes.reshape((4, 1)) if suptitle is None: suptitle = 'Quality of Approximate Distances' # fake_data = data is None if fake_data: algo2offset = {'Bolt': .4, 'PQ': .3, 'OPQ': .45, 'PairQ': .5} nfake_corrs = 10 dicts = [] for dataset in DATASETS: for nbytes in NBYTES_LIST: for algo in ALGOS: if fake_data: corrs = np.random.rand(nfake_corrs) / 2. corrs += algo2offset[algo] corrs *= .9 + .1 * nbytes / 32. params = {'algo': algo, 'dataset': dataset, 'nbytes': '{}B'.format(nbytes)} dicts += [dict(params, **{'corr': c}) for c in corrs] # data = pd.DataFrame.from_records(dicts, index=[0]) data = pd.DataFrame.from_records(dicts) # print data # return # ------------------------ plot the data for d, dataset in enumerate(DATASETS): # df_dataset = data.loc[data['dataset'] == dataset] df = data.loc[data['dataset'] == dataset] df.rename(columns={'algo': ' '}, inplace=True) # hide from legend ax = axes.ravel()[d] sb.barplot(x='nbytes', y='corr', hue=' ', data=df, ax=ax) else: DATASETS = ['Sift1M', 'Convnet1M', 'LabelMe', 'MNIST'] # ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ'] # DATASETS = ['Convnet1M', 'MNIST'] # ALGOS = ['PQ', 'OPQ'] # ALGOS = ['PQ4', 'PQ', 'OPQ'] ALGOS = ['Bolt No Quantize', 'PQ', 'OPQ'] for d, dset in enumerate(DATASETS): if l2: path = os.path.join('../results/correlation_l2/', dset, 'all_results.csv') else: path = os.path.join('../results/correlation_dotprods/', dset, 'all_results.csv') df = pd.read_csv(path) print "path: ", path pq4 = (df['_algo'] == 'PQ') & (df['_code_bits'] == 4) df.loc[pq4, '_algo'] = 'Bolt No Quantize' bolt_rot = (df['_algo'] == 'Bolt') & (df['opq_iters'] > 0) df = df.loc[~bolt_rot] # print df.loc[df['_algo'] == 'PQ4'] # print df.loc[df['_algo'] == 'PQ4'] # return df.rename(columns={'_algo': ' '}, inplace=True) # df['nbytes'] = df['_code_bits'] * df['_ncodebooks'] / 8 all_nbytes = (df['_code_bits'] * df['_ncodebooks'] / 8).values df['nbytes'] = ["{}B".format(b) for b in all_nbytes.astype(np.int)] ax = axes.ravel()[d] # sb.barplot(x='nbytes', y='corr', hue=' ', data=df, ax=ax) sb.barplot(x='nbytes', y='corr', hue=' ', data=df, ax=ax, capsize=.0025) ax.set_title(dset) # ------------------------ legend ax = axes.ravel()[-1] leg_lines, leg_labels = ax.get_legend_handles_labels() plt.figlegend(leg_lines, leg_labels, loc='lower center', ncol=2, labelspacing=0) # ------------------------ axis cleanup / formatting # configure all axes for i, ax in enumerate(axes.ravel()): # title = "{}".format(DATASETS[i]) # TODO uncomment # ax.set_title(title, y=1.01) # TODO uncomment # ax.set_ylim([0, 1]) ax.set_ylim([.5, 1]) # ax.set_ylim([.75, 1]) ax.set_xlabel('', labelpad=-10) if l2: ax.set_ylabel('Correlation With\nTrue Distance') else: if camera_ready: # ax.set_ylabel('Correlation With\nTrue Dot Product', y=.46, fontsize=13) ax.set_ylabel('Correlation With\nTrue Dot Product', fontsize=13) else: ax.set_ylabel('Correlation With\nTrue Dot Product') if ax.legend_: ax.legend_.remove() # ------------------------ show / save plot # plt.tight_layout() # for fig size 6x9 plt.tight_layout(h_pad=.8) # if camera_ready: # plt.suptitle(suptitle, fontsize=17) # else: # plt.suptitle(suptitle, fontsize=16) plt.suptitle(suptitle, fontsize=16) # plt.subplots_adjust(top=.92, bottom=.08) # for fig size 6x9 # plt.subplots_adjust(top=.90, bottom=.08) plt.subplots_adjust(top=.90, bottom=.1) if camera_ready: save_fig_png(fname) # bypass mpl truetype pdf ineptitude else: save_fig(fname)
def recall_r_fig(fake_data=False, suptitle=None, l2=True, fname='l2_recall', camera_ready=False): # experiment params: # datasets = Sift1M, Convnet1M, LabelMe22k, MNIST # bytes = [8, 16, 32] # R = 1, 10, 100, 1000 DATASETS = ['Sift1M', 'Convnet', 'LabelMe', 'MNIST'] ALGOS = ['Bolt', 'PQ', 'OPQ', 'PairQ'] NBYTES_LIST = [8, 16, 32] # Rs = [1, 10, 100, 1000] Rs = [1, 5, 10, 50, 100, 500, 1000] sb.set_style('darkgrid') sb.set_context("talk") set_palette(ncolors=len(ALGOS)) fig, axes = plt.subplots(4, 3, figsize=(6, 9)) if suptitle is None: suptitle = 'Nearest Neighbor Recall' if fake_data: algo2offset = {'Bolt': -.1, 'PQ': -.2, 'OPQ': 0, 'PairQ': .1} data = np.random.rand(1, len(Rs), len(algo2offset)) data = np.sort(data, axis=1) # ensure fake recalls are monotonic for i, algo in enumerate(ALGOS): recall = data[:, :, i] + algo2offset[algo] data[:, :, i] = np.clip(recall, 0., 1.) line_styles_for_nbytes = {8: '-', 16: '-', 32: '-'} # plot the data for d, dataset in enumerate(DATASETS): axes_row = axes[d] for b, nbytes in enumerate(NBYTES_LIST): ax = axes_row[b] if fake_data: # TODO handle real data data_tmp = data * (.5 + nbytes / 64.) # slightly less assert np.max(data_tmp) <= 1. for algo in ALGOS: x = Rs sb.tsplot(data=data_tmp, condition=ALGOS, time=x, ax=ax, n_boot=100, ls=line_styles_for_nbytes[nbytes]) else: # real data DATASETS = ['Sift1M', 'Convnet1M', 'LabelMe', 'MNIST'] # ALGOS = ['PQ', 'OPQ'] ALGOS = ['Bolt', 'Bolt No Quantize', 'PQ', 'OPQ'] for d, dset in enumerate(DATASETS): if l2: path = os.path.join('../results/recall_at_r/', dset, 'summary.csv') else: path = os.path.join('../results/recall_at_r_mips/', dset, 'summary.csv') df = pd.read_csv(path) pq4 = (df['_algo'] == 'PQ') & (df['_code_bits'] == 4) df.loc[pq4, '_algo'] = 'Bolt No Quantize' # rm results with bolt rotations bolt_rot = (df['_algo'] == 'Bolt') & (df['opq_iters'] > 0) df = df.loc[~bolt_rot] df.rename(columns={'_algo': 'algo'}, inplace=True) all_nbytes = (df['_code_bits'] * df['_ncodebooks'] / 8).values df['nbytes'] = all_nbytes.astype(np.int) for b, nbytes in enumerate(NBYTES_LIST): ax = axes[d, b] data = df.loc[df['nbytes'] == nbytes] for algo in ALGOS: df_row = data.loc[data['algo'] == algo] # should be 1 row if len(df_row) != 1: print df_row print "dset = ", dset print "algo = ", algo assert len(df_row) == 1 assert len(df_row) == 1 x = np.array(Rs) y = [df_row['recall@{}'.format(r)].values[0] for r in x] if camera_ready: x = np.log10(x) # print "recall plot: using X values: ", x # TODO rm ax.plot(x, y, label=algo) ax.legend() # ------------------------ legend ax = axes.ravel()[-1] leg_lines, leg_labels = ax.get_legend_handles_labels() # for some reason, each algo appears 3x, so just take first leg_lines, leg_labels = leg_lines[:len(ALGOS)], leg_labels[:len(ALGOS)] plt.figlegend(leg_lines, leg_labels, loc='lower center', ncol=len(ALGOS), labelspacing=0) # ------------------------ axis cleanup / formatting # configure all axes for i, ax_row in enumerate(axes): for j, ax in enumerate(ax_row): title = "{}, {}B".format(DATASETS[i], NBYTES_LIST[j]) if camera_ready: # x_pos = .44 if j == 0 else .45 # ax.set_title(title, x=x_pos, y=1.01, fontsize=15) # ax.set_title(title, x=.45, y=1.01, fontsize=15) # x_pos = .49 if j == 0 else .48 # ax.set_title(title, x=.49, y=1.01, fontsize=15) ax.set_title(title, y=1.01, fontsize=15) else: ax.set_title(title, y=1.01) ax.set_ylim([0, 1]) if not camera_ready: ax.set_xscale("log") # remove all legends except the very last one if (i != len(axes) or j != len(ax_row)) and ax.legend_: ax.legend_.remove() # remove x labels except for bottom axis for ax in axes[:-1, :].ravel(): plt.setp(ax.get_xticklabels(), visible=False) # ax.get_xaxis().set_visible(False) if axes.shape[1] > 1: # hide y axis for axes not in left col for i, ax in enumerate(axes[:, 1:].ravel()): # pass # ax.get_yaxis().set_visible(False) ax.get_yaxis().set_ticklabels([], labelpad=-10, fontsize=1) # ylabel left col for i, ax in enumerate(axes[:, 0].ravel()): ax.set_ylabel("Recall@R") # xlabel bottom rows if camera_ready: for i, ax in enumerate(axes.ravel()): ax.set_xticks([0, 1, 2, 3]) for i, ax in enumerate(axes[-1, :].ravel()): ax.set_xticklabels(['0', '1', '2', '3']) else: for i, ax in enumerate(axes[-1, :].ravel()): # no idea why we need the dummy tick at the beginning ax.set_xticklabels(['', '0', '1', '2', '']) axes[-1, -1].set_xticklabels(['', '0', '1', '2', '3']) axes[-1, 1].set_xlabel("Log10(R)") # ------------------------ show / save plot # plt.tight_layout(h_pad=.02, w_pad=.02) plt.tight_layout(w_pad=.02) # plt.subplots_adjust(top=.88, bottom=.21, hspace=.4) # if camera_ready: # plt.suptitle(suptitle, fontsize=18) # else: # plt.suptitle(suptitle, fontsize=16) plt.suptitle(suptitle, fontsize=16) plt.subplots_adjust(top=.91, bottom=.11) if camera_ready: save_fig_png(fname) # mpl saving as pdf stupid; just bypass it else: save_fig(fname)
def matmul_fig(fake_data=False, fname='matmul', camera_ready=False): # two line graphs # lines in both top and bottom = bolt {8,16,32}B, matmul # just use square mats of power-of-two lengths cuz best case for matmuls # in top one, one mat already encoded and Bolt just has to do queries # in bottom one, Bolt has encode one of the mats as data before queries sb.set_style('darkgrid') sb.set_context("talk") # sb.set_palette("Set1", n_colors=len(ALGOS)) pal = set_palette(ncolors=8) fig, axes = plt.subplots(2, 1, figsize=(6, 6)) # axes = axes.reshape((2, 1)) if fake_data: # for debuging / prototyping fig SIZES = np.array([64, 128, 256, 512, 1024, 2048, 4096], dtype=np.float32) matmul_times = (SIZES ** 2.5).reshape((-1, 1)) # strassen-ish scaling bolt_times = ((SIZES ** 3) / 100 + 400).reshape((-1, 1)) # pretend we had 5 trials; each trial gets a col, so rows are lengths matmul_times = np.tile(matmul_times, (1, 5)) bolt_times = np.tile(bolt_times, (1, 5)) matmul_times += np.random.randn(*matmul_times.shape) * SIZES.T.reshape((-1, 1)) / 10. bolt_times += np.random.randn(*bolt_times.shape) * SIZES.T.reshape((-1, 1)) / 10. matmul_times /= 1e9 bolt8_times = bolt_times / 2e9 bolt16_times = bolt_times / 1e9 bolt32_times = bolt_times / .5e9 dicts = [] ALGOS = ['Bolt 8B', 'Bolt 16B', 'Bolt 32B', 'Floats (BLAS)'] algo_times = [bolt8_times, bolt16_times, bolt32_times, matmul_times] for all_times, algo in zip(algo_times, ALGOS): for sz, times_for_sz in zip(SIZES, all_times): dicts += [{'algo': algo, 'trial': i, 'size': sz, 'y': t} for i, t in enumerate(times_for_sz)] df = pd.DataFrame.from_records(dicts) df_enc = df df_no_enc = df sb.tsplot(time='size', value='y', condition='algo', unit='trial', data=df_no_enc, ax=axes[0], n_boot=100) sb.tsplot(time='size', value='y', condition='algo', unit='trial', data=df_enc, ax=axes[1], n_boot=100) else: # ALGOS = ['Bolt 8B', 'Bolt 16B', 'Bolt 32B', 'Floats'] # ALGOS = ['Bolt 32B', 'Bolt 32B + Encode', 'Floats'] # ALGOS = ['Bolt 8B', 'Bolt 32B', 'Bolt 32B + Encode', 'Floats'] ALGOS = ['Bolt 8B', 'Bolt 8B + Encode', 'Bolt 32B', 'Bolt 32B + Encode', 'Floats'] # df = results.matmul_results_square() def clean_df(df): df = df.loc[df['algo'].isin(ALGOS)] non_encode_algos = ['Bolt 8B', 'Bolt 16B', 'Bolt 32B'] # rm_idxs = (df['algo'] == 'Bolt 32B') * (df['enc'] == 1) rm_idxs = (df['algo'].isin(non_encode_algos)) * (df['enc'] == 1) df = df.loc[~rm_idxs] df['algo'].loc[df['algo'] == 'Floats'] = 'Floats (BLAS)' return df df = results.matmul_results(which='square') df = clean_df(df) colors = { 'Bolt 8B': pal[0], 'Bolt 8B + Encode': pal[0], # 'Bolt 16B': pal[2], 'Bolt 16B + Encode': pal[2], 'Bolt 32B': pal[1], 'Bolt 32B + Encode': pal[1], 'Floats (BLAS)': 'k' } df_no_enc = df.loc[df['enc'] != 1] sb.tsplot(time='size', value='y', condition='algo', unit='trial', data=df_no_enc, ax=axes[0], n_boot=100, color=colors, linestyle='solid') df_enc = df.loc[df['enc'] == 1] sb.tsplot(time='size', value='y', condition='algo', unit='trial', data=df_enc, ax=axes[0], n_boot=100, color=colors, linestyle='dotted', lw=4) df = results.matmul_results(which='tall') df = clean_df(df) # print df # return # sb.tsplot(time='size', value='y', condition='algo', unit='trial', # data=df, ax=axes[1], n_boot=100, color=colors) df_no_enc = df.loc[df['enc'] != 1] sb.tsplot(time='size', value='y', condition='algo', unit='trial', data=df_no_enc, ax=axes[1], n_boot=100, color=colors, linestyle='solid') df_enc = df.loc[df['enc'] == 1] sb.tsplot(time='size', value='y', condition='algo', unit='trial', data=df_enc, ax=axes[1], n_boot=100, color=colors, linestyle='dotted', lw=4) # axes[1].set_ylim(1, 1e3) # without encoding at the top; with encoding on the bottom # sb.tsplot(time='size', value='y', condition='algo', unit='trial', # sb.tsplot(time='size', value='y', condition='algo', unit='trial', # data=df_no_enc, ax=axes[0], n_boot=100) # sb.tsplot(time='size', value='y', condition='algo', unit='trial', # data=df_enc, ax=axes[1], n_boot=100) # ------------------------ legend ax = axes.ravel()[-1] leg_lines, leg_labels = ax.get_legend_handles_labels() # for some reason, each algo appears 3x, so just take first leg_lines, leg_labels = leg_lines[:len(ALGOS)], leg_labels[:len(ALGOS)] plt.figlegend(leg_lines, leg_labels, loc='lower center', ncol=len(ALGOS)/2, labelspacing=0) # ------------------------ axis cleanup / formatting # axes[0].set_title('Matrix Multiply Time, One Matrix Encoded', y=1.03, fontsize=16) # axes[1].set_title('Matrix Multiply Time, Neither Matrix Encoded', y=1.03, fontsize=16) axes[0].set_title('Square Matrix Multiply Time', y=1.03, fontsize=16) axes[1].set_title('Tall Matrix Multiply Time', y=1.03, fontsize=16) for ax in axes.ravel(): ax.legend_.remove() ax.set_xscale('log', basex=2) ax.set_yscale('log', basey=10) if not camera_ready: ax.set_ylabel('Wall Time (s)') if camera_ready: axes[0].set_ylabel('Wall Time (s)') axes[1].set_ylabel('Wall Time (s)', labelpad=10) # for ax in axes[:-1].ravel(): # # plt.setp(ax.get_xticklabels(), visible=False) # ax.set_xlabel('', labelpad=-10) # axes[0].set_xlabel('Matrix Side Length, L', labelpad=-1) axes[0].set_xlabel('Matrix Side Length') axes[1].set_xlabel('Matrix Side Length') # ------------------------ show / save plot # plt.tight_layout(h_pad=1.4) plt.tight_layout(h_pad=1.2) plt.subplots_adjust(bottom=.23) if camera_ready: save_fig_png('matmul_speed') # bypass mpl truetype pdf ineptitude else: save_fig('matmul_speed')
import numpy as np from scipy.io import mmread from matplotlib import pyplot as plt import seaborn as sns import pandas as pd import glob import re import os import sys from sklearn.metrics.pairwise import cosine_similarity sns.set(style='darkgrid') sns.set_style(style='whitegrid') # 一つ上の階層のmoduleをインポートできるようにする current_dir = os.path.dirname(os.path.abspath("__file__")) sys.path.append( str(current_dir) + '/../' ) from setting_param import ratio_test from setting_param import ratio_valid from setting_param import all_node_num from setting_param import MakeSample_repeat3_attribute_prediction_new_InputDir EXIST_TABLE = np.load(MakeSample_repeat3_attribute_prediction_new_InputDir + '/exist_table.npy') from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_Baseline_InputDir as Baseline_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_LSTM_InputDir as LSTM_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_STGGNN_InputDir as STGGNN_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_EGCNh_InputDir as EGCNh_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_STGCN_InputDir as STGCN_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_EGCNo_InputDir as EGCNo_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_GCN_InputDir as GCN_InputDir from setting_param import Evaluation_repeat3_attribute_prediction_new_utilize_new_attribute_link_DynGEM_InputDir as DynGEM_InputDir
""" Code for generating plots. """ import pandas import seaborn as sns import matplotlib import msprime import os import matplotlib.patches as mpatches from matplotlib import pyplot as plt import numpy as np # Force matplotlib to not use any Xwindows backend. matplotlib.use('Agg') sns.set_style("darkgrid") def plot_stairway_Ne_estimate(infile, outfile): """ figure of N(t) for single run of stairwayplot """ nt = pandas.read_csv(infile, sep="\t", skiprows=5) nt = nt[nt['year'] > 10] f, ax = plt.subplots(figsize=(7, 7)) ax.set(xscale="log", yscale="log") ax.plot(nt['year'], nt['Ne_median'], c="red") ax.plot(nt['year'], nt['Ne_2.5%'], c='grey') ax.plot(nt['year'], nt['Ne_97.5%'], c='grey') f.savefig(outfile, bbox_inches='tight')
import multiprocessing from gensim.models import Word2Vec from time import time import logging # Setting upp the loggings to monitor gensim logging.basicConfig(filename="Logger(1).log", format="%(levelname)s - %(asctime)s: %(message)s", datefmt='%H:%M:%S', level=logging.INFO) import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_style('darkgrid') from sklearn.decomposition import PCA from sklearn.manifold import TSNE import pandas as pd # Read sentences from file logging.info("Start load sentences from file") fname = "simpsons_trainingset.txt" sentences = [] with open(fname, 'r', encoding='utf-8') as f: while True: sent = f.readline() if len(sent) == 0: break sent = sent[:-1].split() sentences.append(sent)
def process_data(file: str='./params.json') -> Tuple[pd.DataFrame, List[Any], Dict[str, pd.DataFrame]]: """Runs imaging analysis based on inputs from a parameter file""" sns.set_style('darkgrid') ############## PART 1 Preprocess data ########################## ################ Loading params #################### print(f"LOADING PARAMETERS FROM {file}") params = json.load(open(file, 'r')) mode = params.get("mode") dpaths = params.get("dpaths") offset_events = params.get("offset_events") signal_channel = params.get("signal_channel") reference_channel = params.get("reference_channel") deltaf_options = params.get("deltaf_options") z_score_before_alignment = params.get("z_score_before_alignment") analysis_blocks = params.get("analysis_blocks") path_to_ttl_event_params = params.get("path_to_ttl_event_params") path_to_social_excel = params.get("path_to_social_excel") trunc_start = params.get("trunc_start", 0) trunc_end = params.get("trunc_end", 10) ####################### PREPROCESSING DATA ############################### print(f'\n\n\n\nRUNNING IN MODE: {mode} \n\n\n') for dpath_ind, dpath in enumerate(dpaths): # Reads data from Tdt folder PrintNoNewLine('\nCannot find processed pkl object, reading TDT folder instead...') block = ReadNeoTdt(path=dpath, return_block=True) seglist = block.segments print('Done!') # Trunactes first/last seconds of recording PrintNoNewLine('Truncating signals and events...') seglist = TruncateSegments(seglist, start=trunc_start, end=trunc_end, clip_same=True) print('Done!') # Iterates through each segment in seglist. Right now, there is only one segment for segment in seglist: segment_name = segment.name # Extracts the sampling rate from the signal channel try: sampling_rate = [x for x in segment.analogsignals if x.name == signal_channel][0].sampling_rate except IndexError: raise ValueError('Could not find your channels. Make sure you have the right names!') # Appends an analog signal object that is delta F/F. The name of the channel is # specified by deltaf_ch_name above. It is calculated using the function # NormalizeSignal in signal_processing.py. As of right now it: # 1) Lowpass filters signal and reference (default cutoff = 40 Hz, order = 5) # 2) Calculates deltaf/f for signal and reference (default is f - median(f) / median(f)) # 3) Detrends deltaf/f using a savgol filter (default window_lenght = 3001, poly order = 1) # 4) Subtracts reference from signal # NormalizeSignal has a ton of options, you can pass in paramters using # the deltaf_options dictionary above. For example, if you want it to be mean centered # and not run the savgol_filter, set deltaf_options = {'mode': 'mean', 'detrend': False} PrintNoNewLine('\nCalculating delta_f/f...') all_signals = ProcessSignalData(seg=segment, sig_ch=signal_channel, ref_ch=reference_channel, name='DeltaF_F', fs=sampling_rate, highcut=40.0, **deltaf_options) # Appends an Event object that has all event timestamps and the proper label # (determined by the evtframe loaded earlier). Uses a tolerance (in seconds) # to determine if events co-occur. For example, if tolerance is 1 second # and ch1 fires an event, ch2 fires an event 0.5 seconds later, and ch3 fires # an event 3 seconds later, the output array will be [1, 1, 0] and will # match the label in evtframe (e.g. 'omission') print('Done!') if mode == 'TTL': # Loading event labeling/combo parameters path_to_event_params = path_to_ttl_event_params[dpath_ind] elif mode == 'manual': # Generates a json for reading excel file events path_to_event_params = 'imaging_analysis/manual_event_params.json' GenerateManualEventParamsJson(path_to_social_excel[dpath_ind], event_col='Bout type', name=path_to_event_params) # This loads our event params json start, end, epochs, evtframe, typeframe = LoadEventParams(dpath=path_to_event_params, mode=mode) # Appends processed event_param.json info to segment object AppendDataframesToSegment(segment, [evtframe, typeframe], ['eventframe', 'resultsframe']) # Processing events PrintNoNewLine('\nProcessing event times and labels...') if mode == 'manual': manualframe = path_to_social_excel[dpath_ind] else: manualframe = None ProcessEvents(seg=segment, tolerance=.1, evtframe=evtframe, name='Events', mode=mode, manualframe=manualframe, event_col='Bout type', start_col='Bout start', end_col='Bout end', offset_events=offset_events[dpath_ind]) print('Done!') # Takes processed events and segments them by trial number. Trial start # is determined by events in the list 'start' from LoadEventParams. This # can be set in the event_params.json. Additionally, the result of the # trial is set by matching the epoch type to the typeframe dataframe # (also from LoadEventParams). Example of epochs are 'correct', 'omission', # etc. # The result of this process is a dataframe with each event and their # timestamp in chronological order, with the trial number and trial outcome # appended to each event/timestamp. PrintNoNewLine('\nProcessing trials...') trials = ProcessTrials(seg=segment, name='Events', startoftrial=start, epochs=epochs, typedf=typeframe, appendmultiple=False) print('Done!') # With processed trials, we comb through each epoch ('correct', 'omission' # etc.) and find start/end times for each trial. Start time is determined # by the earliest 'start' event in a trial. Stop time is determined by # 1) the earliest 'end' event in a trial, 2) or the 'last' event in a trial # or the 3) 'next' event in the following trial. PrintNoNewLine('\nCalculating epoch times and durations...') GroupTrialsByEpoch(seg=segment, startoftrial=start, endoftrial=end, endeventmissing='last') print('Done!') segment.processed = True ################### ALIGN DATA ########################################## # for segment in seglist: for block in analysis_blocks: # Extract analysis block params epoch_name = block['epoch_name'] event = block['event'] prewindow = block['prewindow'] postwindow = block['postwindow'] downsample = block['downsample'] z_score_window = block['z_score_window'] quantification = block['quantification'] baseline_window = block['baseline_window'] response_window = block['response_window'] save_file_as = block['save_file_as'] heatmap_range = block['plot_paramaters']['heatmap_range'] smoothing_window = block['plot_paramaters']['smoothing_window'] lookup = {} for channel in ['Filtered_signal', 'Filtered_reference', 'Detrended', 'Detrended_reference', 'DeltaF_F_or_Z_score']: print(('\nAnalyzing "{}" trials centered around "{}". Channel: "{}" \n'.format(epoch_name, event, channel))) dict_name = "{}_{}".format(epoch_name, channel) lookup[channel] = dict_name PrintNoNewLine('Centering trials and analyzing...') AlignEventsAndSignals(seg=segment, epoch_name=epoch_name, analog_ch_name=channel, event_ch_name='Events', event=event, event_type='label', prewindow=prewindow, postwindow=postwindow, window_type='event', clip=False, name=dict_name, to_csv=False, dpath=dpath) print('Done!') ######################## PROCESS SIGNALS (IF NECESSARY); PLOT; STATS ###### # Load data signal = segment.analyzed[lookup['Filtered_signal']]['all_traces'] reference = segment.analyzed[lookup['Filtered_reference']]['all_traces'] # Down sample data if downsample > 0: signal = Downsample(signal, downsample, index_col='index') reference = Downsample(reference, downsample, index_col='index') # # Scale signal if it is too weak (want std to be at least 1) # if (np.abs(signal.mean().std()) < 1.) or (np.abs(reference.mean().std()) < 1.): # scale_factor = 10**(np.ceil(np.log10(1/(signal.mean().std())))) # signal = signal * scale_factor # reference = reference * scale_factor # Get plotting read figure = plt.figure(figsize=(12, 12)) figure.subplots_adjust(hspace=1.3) ax1 = plt.subplot2grid((6, 2), (0, 0), rowspan=2) ax2 = plt.subplot2grid((6, 2), (2, 0), rowspan=2) ax3 = plt.subplot2grid((6, 2), (4, 0), rowspan=2) ax4 = plt.subplot2grid((6, 2), (0, 1), rowspan=3) ax5 = plt.subplot2grid((6, 2), (3, 1), rowspan=3) # fig, axs = plt.subplots(2, 2, sharex=False, sharey=False) # fig.set_size_inches(12, 12) ############################### PLOT AVERAGE EVOKED RESPONSE ###################### PrintNoNewLine('Calculating average filtered responses for {} trials...'.format(epoch_name)) signal_mean = signal.mean(axis=1) reference_mean = reference.mean(axis=1) signal_sem = signal.sem(axis=1) reference_sem = reference.sem(axis=1) signal_dc = signal_mean.mean() reference_dc = reference_mean.mean() signal_avg_response = signal_mean - signal_dc reference_avg_response = reference_mean - reference_dc if smoothing_window is not None: signal_avg_response = SmoothSignalWithPeriod(x=signal_avg_response, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') reference_avg_response = SmoothSignalWithPeriod(x=reference_avg_response, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') signal_sem = SmoothSignalWithPeriod(x=signal_sem, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') reference_sem = SmoothSignalWithPeriod(x=reference_sem, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') # # Scale signal if it is too weak (want std to be at least 1) # if (np.abs(signal_avg_response.std()) < 1.) or (np.abs(reference_avg_response.std()) < 1.): # scale_factor = 10**(np.ceil(np.log10(1/(signal_avg_response).std()))) # signal_avg_response = signal_avg_response * scale_factor # signal_se = signal_se * scale_factor # reference_avg_response = reference_avg_response * scale_factor # reference_se = reference_se * scale_factor # Plotting signal # current axis #curr_ax = axs[0, 0] curr_ax = ax1 curr_ax.plot(signal_avg_response.index, signal_avg_response.values, color='b', linewidth=2) curr_ax.fill_between(signal_avg_response.index, (signal_avg_response - signal_sem).values, (signal_avg_response + signal_sem).values, color='b', alpha=0.05) # Plotting reference curr_ax.plot(reference_avg_response.index, reference_avg_response.values, color='g', linewidth=2) curr_ax.fill_between(reference_avg_response.index, (reference_avg_response - reference_sem).values, (reference_avg_response + reference_sem).values, color='g', alpha=0.05) # Plot event onset curr_ax.axvline(0, color='black', linestyle='--') curr_ax.set_ylabel('Voltage (V)') curr_ax.set_xlabel('Time (s)') curr_ax.legend(['465 nm', '405 nm', event]) curr_ax.set_title('Average Lowpass Signal $\pm$ SEM: {} Trials'.format(signal.shape[1])) print('Done!') ############################# Calculate detrended signal ################################# if z_score_before_alignment: detrended_signal = segment.analyzed[lookup['Detrended']]['all_traces'] # Adding detrended reference detrended_ref = segment.analyzed[lookup['Detrended_reference']]['all_traces'] detrended_ref_mean = detrended_ref.mean(axis=1) detrended_ref_sem = detrended_ref.sem(axis=1) if smoothing_window is not None: detrended_ref_mean = SmoothSignalWithPeriod(x=detrended_ref_mean, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') detrended_ref_sem = SmoothSignalWithPeriod(x=detrended_ref_sem, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') else: # Detrending PrintNoNewLine('Detrending signal...') fits = np.array([np.polyfit(reference.values[:, i],signal.values[:, i],1) for i in range(signal.shape[1])]) Y_fit_all = np.array([np.polyval(fits[i], reference.values[:,i]) for i in np.arange(reference.values.shape[1])]).T Y_df_all = signal.values - Y_fit_all detrended_signal = pd.DataFrame(Y_df_all, index=signal.index) ################# PLOT DETRENDED SIGNAL ################################### detrended_signal_mean = detrended_signal.mean(axis=1) detrended_signal_sem = detrended_signal.sem(axis=1) if smoothing_window is not None: detrended_signal_mean = SmoothSignalWithPeriod(x=detrended_signal_mean, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') detrended_signal_sem = SmoothSignalWithPeriod(x=detrended_signal_sem, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') # Plotting signal # current axis curr_ax = ax2 # # curr_ax = axs[1, 0] #curr_ax = plt.axes() if z_score_before_alignment: pass else: zscore_start = detrended_signal[z_score_window[0]:z_score_window[1]].index[0] zscore_end = detrended_signal[z_score_window[0]:z_score_window[1]].index[-1] zscore_height = detrended_signal[z_score_window[0]:z_score_window[1]].mean(axis=1).min() if zscore_height < 0: zscore_height = zscore_height * 1.3 else: zscore_height = zscore_height * 0.7 curr_ax.plot([zscore_start, zscore_end], [zscore_height, zscore_height], color='.1', linewidth=3) # Plot detrended signal curr_ax.plot(detrended_signal_mean.index, detrended_signal_mean.values, color='b', linewidth=2) curr_ax.fill_between(detrended_signal_mean.index, (detrended_signal_mean - detrended_signal_sem).values, (detrended_signal_mean + detrended_signal_sem).values, color='b', alpha=0.05) # Plot detrended reference if necessary if z_score_before_alignment: curr_ax.plot(detrended_ref_mean.index, detrended_ref_mean.values, color='g', linewidth=2) curr_ax.fill_between(detrended_ref_mean.index, (detrended_ref_mean - detrended_ref_sem).values, (detrended_ref_mean + detrended_ref_sem).values, color='g', alpha=0.05) # Plot event onset if z_score_before_alignment: curr_ax.legend(['465 nm', '405 nm']) else: curr_ax.legend(['z-score window']) curr_ax.axvline(0, color='black', linestyle='--') curr_ax.set_ylabel('Voltage (V) or DeltaF/F %') curr_ax.set_xlabel('Time (s)') curr_ax.set_title('Average Detrended Signal $\pm$ SEM') print('Done!') # ########### Calculate z-scores ############################################### if z_score_before_alignment: zscores = segment.analyzed[lookup['DeltaF_F_or_Z_score']]['all_traces'] else: PrintNoNewLine('Calculating Z-Scores for %s trials...' % event) # calculate z_scores zscores = ZScoreCalculator(detrended_signal, baseline_start=z_score_window[0], baseline_end=z_score_window[1]) print('Done!') ############################ Make rasters ####################################### PrintNoNewLine('Making heatmap for %s trials...' % event) # indice that is closest to event onset # curr_ax = axs[0, 1] curr_ax = ax4 # curr_ax = plt.axes() # Plot nearest point to time zero zero = np.concatenate([np.where(zscores.index == np.abs(zscores.index).min())[0], np.where(zscores.index == -1*np.abs(zscores.index).min())[0]]).min() for_hm = zscores.T.copy() # for_hm.index = for_hm.index + 1 for_hm.columns = np.round(for_hm.columns, 1) try: sns.heatmap(for_hm.iloc[::-1], center=0, robust=True, ax=curr_ax, cmap='bwr', xticklabels=int(for_hm.shape[1]*.15), yticklabels=int(for_hm.shape[0]*.15), vmin=heatmap_range[0], vmax=heatmap_range[1]) except: sns.heatmap(for_hm.iloc[::-1], center=0, robust=True, ax=curr_ax, cmap='bwr', xticklabels=int(for_hm.shape[1]*.15), vmin=heatmap_range[0], vmax=heatmap_range[1]) curr_ax.axvline(zero, linestyle='--', color='black', linewidth=2) curr_ax.set_ylabel('Trial'); curr_ax.set_xlabel('Time (s)'); if z_score_before_alignment: curr_ax.set_title('Z-Score or DeltaF/F Heat Map'); else: curr_ax.set_title('Z-Score Heat Map \n Baseline Window: {} to {} Seconds'.format(z_score_window[0], z_score_window[1])); print('Done!') ########################## Plot Z-score waveform ########################## PrintNoNewLine('Plotting Z-Score waveforms...') zscores_mean = zscores.mean(axis=1) zscores_sem = zscores.sem(axis=1) if smoothing_window is not None: zscores_mean = SmoothSignalWithPeriod(x=zscores_mean, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') zscores_sem = SmoothSignalWithPeriod(x=zscores_sem, sampling_rate=float(sampling_rate)/downsample, ms_bin=smoothing_window, window='flat') # Plotting signal # current axis # curr_ax = axs[1, 1] curr_ax = ax3 #curr_ax = plt.axes() # Plot baseline and response baseline_start = zscores[baseline_window[0]:baseline_window[1]].index[0] baseline_end = zscores[baseline_window[0]:baseline_window[1]].index[-1] response_start = zscores[response_window[0]:response_window[1]].index[0] response_end = zscores[response_window[0]:response_window[1]].index[-1] baseline_height = zscores[baseline_window[0]:baseline_window[1]].mean(axis=1).min() - 0.5 response_height = zscores[response_window[0]:response_window[1]].mean(axis=1).max() + .5 curr_ax.plot([baseline_start, baseline_end], [baseline_height, baseline_height], color='.6', linewidth=3) curr_ax.plot([response_start, response_end], [response_height, response_height], color='r', linewidth=3) curr_ax.plot(zscores_mean.index, zscores_mean.values, color='b', linewidth=2) curr_ax.fill_between(zscores_mean.index, (zscores_mean - zscores_sem).values, (zscores_mean + zscores_sem).values, color='b', alpha=0.05) # Plot event onset curr_ax.axvline(0, color='black', linestyle='--') curr_ax.set_xlabel('Time (s)') curr_ax.legend(['baseline window', 'response window']) if z_score_before_alignment: curr_ax.set_title('465 nm Average Z-Score or DeltaF/F Signal $\pm$ SEM') curr_ax.set_ylabel('Z-Score or DeltaF/F %') else: curr_ax.set_title('465 nm Average Z-Score Signal $\pm$ SEM') curr_ax.set_ylabel('Z-Score') print('Done!') ##################### Quantification ################################# PrintNoNewLine('Performing statistical testing on baseline vs response periods...') if quantification is not None: # Generating summary statistics if quantification == 'AUC': base = np.trapz(zscores[baseline_window[0]:baseline_window[1]], axis=0) resp = np.trapz(zscores[response_window[0]:response_window[1]], axis=0) ylabel = 'AUC' elif quantification == 'mean': base = np.mean(zscores[baseline_window[0]:baseline_window[1]], axis=0) resp = np.mean(zscores[response_window[0]:response_window[1]], axis=0) ylabel = 'Z-Score or DeltaF/F' elif quantification == 'median': base = np.median(zscores[baseline_window[0]:baseline_window[1]], axis=0) resp = np.median(zscores[response_window[0]:response_window[1]], axis=0) ylabel = 'Z-Score or DeltaF/F' if isinstance(base, pd.core.series.Series): base = base.values resp = resp.values base_sem = np.mean(base)/np.sqrt(base.shape[0]) resp_sem = np.mean(resp)/np.sqrt(resp.shape[0]) # Testing for normality (D'Agostino's K-Squared Test) (N>8) if base.shape[0] > 8: normal_alpha = 0.05 base_normal = stats.normaltest(base) resp_normal = stats.normaltest(resp) else: normal_alpha = 0.05 base_normal = [1, 1] resp_normal = [1, 1] difference_alpha = 0.05 if (base_normal[1] >= normal_alpha) or (resp_normal[1] >= normal_alpha): test = 'Wilcoxon Signed-Rank Test' stats_results = stats.wilcoxon(base, resp) else: test = 'Paired Sample T-Test' stats_results = stats.ttest_rel(base, resp) if stats_results[1] <= difference_alpha: sig = '**' else: sig = 'ns' #curr_ax = plt.axes() curr_ax = ax5 ind = np.arange(2) labels = ['baseline', 'response'] bar_kwargs = {'width': 0.7,'color': ['.6', 'r'],'linewidth':2,'zorder':5} err_kwargs = {'zorder':0,'fmt': 'none','linewidth':2,'ecolor':'k'} curr_ax.bar(ind, [base.mean(), resp.mean()], tick_label=labels, **bar_kwargs) curr_ax.errorbar(ind, [base.mean(), resp.mean()], yerr=[base_sem, resp_sem],capsize=5, **err_kwargs) x1, x2 = 0, 1 y = np.max([base.mean(), resp.mean()]) + np.max([base_sem, resp_sem])*1.3 h = y * 1.5 col = 'k' curr_ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col) curr_ax.text((x1+x2)*.5, y+h, sig, ha='center', va='bottom', color=col) curr_ax.set_ylabel(ylabel) curr_ax.set_title('Baseline vs. Response Changes in Z-Score or DeltaF/F Signal \n {} of {}s'.format(test, quantification)) print('Done!') ################# Save Stuff ################################## PrintNoNewLine('Saving everything...') save_path = os.path.join(dpath, segment_name, save_file_as) figure.savefig(save_path + '.png', format='png') figure.savefig(save_path + '.pdf', format='pdf') plt.close() print('Done!') # Trial z-scores # Fix columns zscores.columns = np.arange(1, zscores.shape[1] + 1) zscores.columns.name = 'trial' # Fix rows zscores.index.name = 'time' zscores.to_csv(save_path + '_zscores_or_deltaf_aligned.csv') Downsample(zscores, downsample, index_col='time').to_csv(save_path + '_zscores_or_deltaf_aligned_downsampled.csv') if quantification is not None: # Trial point estimates point_estimates = pd.DataFrame({'baseline': base, 'response': resp}, index=np.arange(1, base.shape[0]+1)) point_estimates.index.name = 'trial' point_estimates.to_csv(save_path + '_point_estimates.csv') # Save meta data metadata = { 'baseline_window': baseline_window, 'response_window': response_window, 'quantification': quantification, 'original_sampling_rate': float(sampling_rate), 'downsampled_sampling_rate': float(sampling_rate)/downsample } with open(save_path + '_metadata.json', 'w') as fp: json.dump(metadata, fp) # Save smoothed data smoothed_zscore = pd.concat([zscores_mean, zscores_sem], axis=1) smoothed_zscore.columns = ['mean', 'sem'] smoothed_zscore.to_csv(save_path + '_smoothed_zscores_or_deltaf.csv') Downsample(smoothed_zscore, downsample, index_col='time').to_csv(save_path + '_smoothed_zscores_or_deltaf_downsampled.csv') print(('Finished processing datapath: %s' % dpath)) return trials, seglist, all_signals
# 1. What is the change in price of the stock over time? # 2. What is the daily return of the stock on average? # 3. What is the moving average of the various stocks? # 4.1. What is the correlation between different stocks' closing prices? # 4.2. What is the correlation between different stocks' daily returns? # 5. How much value is at risk if we invest in a particular stock? # 6. How can we attempt to predict future stock behavior? # %% import pandas as pd from pandas import Series, DataFrame import numpy as np # %% import matplotlib.pyplot as plt import seaborn as sns sns.set_style('whitegrid') %matplotlib inline # %% from pandas_datareader import DataReader # %% from datetime import datetime # %% from __future__ import division # %% # This is a tech list of the big companies: Apple, Google, Microsoft, and Amazon tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN'] # %% end = datetime.now() start = datetime(end.year-1, end.month, end.day)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Sep 23 17:06:01 2019 @author: qianqianwang """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_style("whitegrid") import warnings warnings.filterwarnings('ignore') from scipy import stats from scipy.stats import norm, skew import time # read data df_train = pd.read_csv('df37_window1_train.csv') df_test = pd.read_csv('df37_window1_test.csv') from collections import Counter # Outlier detection def detect_outliers(df, n, features): """ Takes a dataframe df of features and returns a list of the indices corresponding to the observations containing more than n outliers according
if np.any(np.isnan(check_nan)): continue c,p = stats.pearsonr(var,data[:,mm,nn]) corr.data[mm,nn] = c # convert trend to per decade sig = (p < (1-ci)) corr.mask[mm,nn] = ~sig return corr corr_DJF = corr_season(t2m_ave, sst_xr_DJF) corr_SON = corr_season(t2m_ave, sst_xr_SON) corr_JJA = corr_season(t2m_ave, sst_xr_JJA) corr_MAM = corr_season(t2m_ave, sst_xr_MAM) # plot, mind the projection which matters sns.set_style('white', {'font.family': 'Arial'}) lon_formatter = LongitudeFormatter(zero_direction_label=True) lat_formatter = LatitudeFormatter() lonlbl = [u'160°E',u'10°W',u'40°W',u'70°W',u'100°W'] latlbl = [u'10°S',u'5°S',u'0°',u'5°N',u'10°N'] fig = plt.figure(figsize=(12,9),dpi=300) ax0 = fig.add_subplot(221, projection=ccrs.PlateCarree(central_longitude = 180)) plt.subplots_adjust(wspace =0.5, hspace =0.2)# adjust the space of subplots cs1 = ax0.contour(lon_sst, lat_sst, sst_xr_DJF.mean(axis = 0), np.linspace(-0.02,0.03,6), colors='k',transform=ccrs.PlateCarree()) cs2 = ax0.contourf(lon_sst, lat_sst, corr_DJF.data, np.linspace(-0.5,0.5,11), cmap=plt.cm.RdBu_r, extend='both',transform=ccrs.PlateCarree()) ax0.contourf(lon_sst, lat_sst, corr_DJF.mask.astype('int'), [-0.5,0.5], hatches=['.','none'], colors='none', zorder=10,transform=ccrs.PlateCarree()) ax0.clabel(cs1, inline=1, fontsize=8) ax0.set_extent([120, 280, -60, 60],crs=ccrs.PlateCarree())
dtype=float, usecols=[1], skiprows=3) P_k11_evo_opt = np.loadtxt('Knigge11/knigge11_evo_track_opt.dat', dtype=float, usecols=[2], skiprows=3) zeta_k11_evo_opt = np.loadtxt('Knigge11/knigge11_evo_track_opt.dat', dtype=float, usecols=[7], skiprows=3) #produce plot seaborn.set(style='ticks') seaborn.set_style({"xtick.direction": "in", "ytick.direction": "in"}) #plt.rcParams['xtick.major.pad']='15' #plt.rcParams['ytick.major.pad']='10' #plt.axis([1,1.7001,0,0.15001]) #plt.axis([0.02,0.2,0.08,0.3]) #plt.axis([1.0,3.001,0.02,0.301]) #plt.axis([1.1,1.6,0.03,0.11]) #plt.axis([0.01,0.2,0.08,0.260]) #plt.tick_params(top='on',right='on') fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_xscale('log')
False, save_file=scratch_dir / "CHECKM8_RAND_APPROX.png") data.append({ "Strategy": str(scheduler_lp_rand.solve_strategy.value), "Name": "CHECKM8_RAND_APPROX", "CPU": scheduler_lp_rand.schedule_aux_data.cpu, "Activation RAM": scheduler_lp_rand.schedule_aux_data.activation_ram, }) # Plot solution memory usage vs cpu scatter plot sns.set() sns.set_style("white") plt.figure(figsize=(4, 4)) plt.xlabel("Activation memory usage (GB)") plt.ylabel("GPU time (ms)") color, marker, markersize = SolveStrategy.get_plot_params( scheduler_result_all.solve_strategy) plt.axhline(y=scheduler_result_all.schedule_aux_data.cpu / 1000, color=color, linestyle="--", label="Checkpoint all (ideal)") if args.model_name in LINEAR_MODELS: color, marker, markersize = SolveStrategy.get_plot_params( scheduler_result_sqrtn.solve_strategy)
import os import seaborn as sns import pickle as pkl pal = sns.color_palette('Blues') sns.set_context("paper", font_scale=1.5) sns.set_style("ticks") import numpy as np import matplotlib.pyplot as plt import pdb from scipy import stats import matplotlib.cm as cm # In[173]: dic = pkl.load( open('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_zR.p', 'rb')) #MSG_TRMM_temp_pcp_300px2004-2013_new.p', 'rb')) dic2 = pkl.load( open('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_size_zR.p', 'rb')) # In[174]: _p = np.array(dic['pmax']) # 98th perc per MCS _t = np.array(dic['tmin']) #mean T _clat = np.array(dic['clat']) _area = np.array(dic['area']) * 25 _isfin = np.array(dic['isfin']) _po30 = np.array(dic['po30']) _perc = np.array(dic['pperc']) _pp = np.array(dic['p'])
from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report from pydotplus import graph_from_dot_data ''' Graphviz is not a python tool. The python packages at pypi provide a convenient way of using Graphviz in python code. You still have to install the Graphviz executables, which are not pythonic, thus not shipped with these packages. You can install those e.g. with a general-purpose package manager such as homebrew brew install graphviz pydot.InvocationException: GraphViz's executables not found ''' from sklearn.tree import export_graphviz from pydotplus import graph_from_dot_data sns.set_style("dark") colors = ["#800000", "#45ada8", "#2a363b", "#fecea8", "#99b898", "#e5fcc2"] sns.set_palette(sns.color_palette(colors)) breast_data = pd.read_csv('./data/data.csv') #breast_data = breast_data.drop(['ID','Unnamed: 32'],axis=1) #drop diagnosis, create X and Y y = breast_data['diagnosis'] x_ = breast_data.drop('diagnosis', axis=1) x = x_.drop('id', axis=1) #replace M and B with 1s and 0s y = y.replace(['M', 'B'], [1, 0]) columns = x.columns
#%% Play with scanpys PCA sc.tl.pca(adata_pre, n_comps=50, zero_center=True, svd_solver='auto', random_state=0, return_info=False, use_highly_variable=None, dtype='float32', copy=False, chunked=False, chunk_size=None) #%% classvecser= adata_pre.obs['survivor'] classvec = pd.DataFrame(classvecser) PCs=adata_pre.obsm['X_pca'] PCdf = pd.DataFrame(PCs) classvec.reset_index(drop=True, inplace=True) PCdf.reset_index(drop=True, inplace=True) PC_df=pd.concat([classvec['survivor'],PCdf], axis =1) #%% sns.set_style('white') from matplotlib.pyplot import plot, show, draw, figure, cm import matplotlib.pyplot as plt fig = plt.figure(figsize=(6,6)) ax=sns.scatterplot(PC_df[0], PC_df[1], hue= PC_df['survivor']) ax.set(xlabel ='PC1', ylabel ='PC2') ax1=sns.scatterplot(PC_df[1], PC_df[2], hue= PC_df['survivor']) ax1.set(xlabel ='PC2', ylabel ='PC3') ax2=sns.scatterplot(PC_df[2], PC_df[3], hue= PC_df['survivor']) ax2.set(xlabel ='PC3', ylabel ='PC4') ax3=sns.scatterplot(PC_df[0], PC_df[2], hue= PC_df['survivor'])
import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.model_selection import train_test_split from scipy.stats import norm from scipy import stats from scipy.stats import skew import matplotlib as mpl import seaborn as sns import warnings warnings.filterwarnings('ignore') mpl.rcParams['font.sans-serif'] = ['SimHei'] mpl.rcParams['font.serif'] = ['SimHei'] mpl.rcParams['font.size'] = 10 sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']}) NROWS = 200000 def cal_car_state(fix_record): state = 0 if isinstance(fix_record, str) is True: a = fix_record.count('喷漆修复') b = fix_record.count('覆盖件更换') c = fix_record.count('钣金修复') d = fix_record.count('有色差') state = a * 0.2 + b * 0.3 + c * 0.4 + d * 0.1 state = state/(a+b+c+d) return (1 - state) * 10 current_year = datetime.datetime.now().year
def plot_score_distribution(config): info('plot_score_distribution()') cwd_slash = gen_cwd_slash(config) thdf = pd.read_csv(cwd_slash('calibrated_threshold.csv'), index_col=0) thdf.index = thdf.index.astype(str) valid_agg = pd.read_csv(cwd_slash('valid_aggregated_prediction.csv')) valid_agg = valid_agg.melt(id_vars='source_img_id', var_name='class_id', value_name='p') valid_agg['logit'] = logit(valid_agg['p']) valid_grouped = valid_agg.groupby('class_id') test_agg = pd.read_csv(cwd_slash('test_aggregated_prediction.csv')) test_agg = test_agg.melt(id_vars='source_img_id', var_name='class_id', value_name='p') test_agg['logit'] = logit(test_agg['p']) test_grouped = test_agg.groupby('class_id') mean_std_df = pd.DataFrame( { 'valid_mean': valid_grouped['logit'].mean(), 'valid_std': valid_grouped['logit'].std(), 'test_mean': test_grouped['logit'].mean(), 'test_std': test_grouped['logit'].std(), }, ) thdf = thdf.join(mean_std_df, how='left') thdf['th_logit'] = logit(thdf['best_threshold']) thdf['z_score'] = (thdf['th_logit'] - thdf['valid_mean']) / thdf['valid_std'] thdf['th_adjusted_logit'] = thdf['z_score'] * thdf['test_std'] + thdf[ 'test_mean'] thdf['th_adjusted'] = np.exp( thdf['th_adjusted_logit']) / (np.exp(thdf['th_adjusted_logit']) + 1) print(thdf) valid_agg['group'] = 'valid' test_agg['group'] = 'test' both_agg = pd.concat([valid_agg, test_agg]) both_agg['class_id'] = [ f"{int(xx):02d}-{class_labels[int(xx)]}" for xx in both_agg['class_id'] ] plt.figure(figsize=(30, 16)) sns.set_style("whitegrid") sns.stripplot(x='class_id', y='logit', hue='group', data=both_agg, jitter=0.3, alpha=0.3, size=1, dodge=True) # sns.violinplot(x='class_id', y='logit', hue='group', data=both_agg, inner=None, color='.8', cut=0, bw=0.001) for id_, row in thdf.iterrows(): plt.plot([int(id_) - 0.4, int(id_)], [logit(row['best_threshold']), logit(row['best_threshold'])]) plt.plot([int(id_), int(id_) + 0.4], [ logit(row['best_threshold'] * 0.5), logit(row['best_threshold'] * 0.5) ]) plt.plot([int(id_), int(id_) + 0.4], [logit(row['th_adjusted']), logit(row['th_adjusted'])], dashes=[1, 1]) plt.axhline(0, dashes=[2, 1, 1, 1]) save_path = cwd_slash('score_distribution.png') plt.xticks(rotation=30, verticalalignment='top', horizontalalignment='right') plt.savefig(save_path, dpi=300) debug(f"saved to {save_path}")
<<< Unstable particles are emitted from a source and decay at a distance x, a real number that has an exponential probability distribution with [parameter] lambda. Decay events can only be observed if they occur in a window extending from x=1 cm to x=20 cm. N decays are observed at locations { 1.5, 2, 3, 4, 5, 12} cm. What is [the distribution of] lambda? """ import sys import numpy as np import pandas as pd from scipy.integrate import quad import matplotlib.pyplot as plt import seaborn as sns sns.set_style(style='dark') # Routine for constructing posterior predictive as a mixture of pdfs: from posterior_predictive_pmf import functional_posterior_predictive_pmf def expon(X, lambd): """ Note: could equivalently use scipy.stats.expon. X and/or lambd may be vectors. """ return (1 / lambd) * np.exp(-X / lambd) def expon_integral(lambd, xmin, xmax): """
Maximilian N. Günther MIT Kavli Institute for Astrophysics and Space Research, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, MA 02109, USA Email: [email protected] Web: www.mnguenther.com """ from __future__ import print_function, division, absolute_import #::: plotting settings import seaborn as sns sns.set(context='paper', style='ticks', palette='deep', font='sans-serif', font_scale=1.5, color_codes=True) sns.set_style({"xtick.direction": "in","ytick.direction": "in"}) sns.set_context(rc={'lines.markeredgewidth': 1}) #::: modules import numpy as np import matplotlib.pyplot as plt import os from astropy.io import fits from collections import OrderedDict #::: exoworld modules from exoworlds.lightcurves import expand_flags
def initUI(self): pg.setConfigOption('background', 'w') params = { 'figure.figsize': [4, 4], 'figure.dpi': 300, 'savefig.dpi': 300 } plt.rcParams.update(params) sns.set() sns.set_style("white") sns.set_palette("muted") sns.set_context("paper") self.fullSignal=[] self.shiftFullSignal=[] self.shiftFullSignalNormal=[] self.fSig='' contain=QSplitter(Qt.Horizontal) buttons = QtWidgets.QVBoxLayout() graphics = QSplitter(Qt.Vertical) imaFrac = QtWidgets.QHBoxLayout() frac = QtWidgets.QVBoxLayout() lagBox = QFormLayout() results = QFormLayout() self.btnLoadSig = QPushButton('Load Signal') self.btnLoadSig.clicked.connect(self.loadSignal) self.btnLoadSig.setStyleSheet("background-color:#fbe9e7; font-size: 18px") self.lblSignal = QLabel('') self.lblSignal.setStyleSheet("font-size: 18px") self.checkTotalSignal = QCheckBox('Signal') self.checkTotalSignal.setStyleSheet("font-size: 18px") self.cmbFractal = QComboBox() self.cmbFractal.setStyleSheet("background-color:#fbe9e7; font-size: 18px") self.cmbFractal.addItem("Triangle") #Elemento 0 self.cmbFractal.addItem("Square") #Elemento 1 self.cmbFractal.addItem("Pentagon") #Elemento 2 self.cmbFractal.addItem("Hexagon") #Elemento 3 #self.cmbFractal.addItem("Octgon") #Ahora éste es el elemento 4 self.btnDo = QPushButton("Do Fractal") self.btnDo.setDisabled(True) self.btnDo.setStyleSheet("font-size: 18px") self.btnDo.clicked.connect(self.showDialog) self.btnFracInter = QPushButton("Points-Inter") self.btnFracInter.setDisabled(True) self.btnFracInter.setStyleSheet("font-size: 18px") self.btnFracInter.clicked.connect(self.update) self.txtLag = QLineEdit('0') self.txtLag.setStyleSheet("font-size: 18px") self.txtLag.setEnabled(True) lblLag = QLabel("LAG") lblLag.setStyleSheet("font-size: 18px") lagBox.addRow(lblLag, self.txtLag) self.btnSub = QPushButton("Graph Poincare") self.btnSub.setDisabled(True) self.btnSub.setStyleSheet("font-size: 18px") self.btnSub.clicked.connect(self.poincSub) self.lblsd1 = QLabel("SD1: ") self.lblsd1.setEnabled(True) self.lblsd1.setStyleSheet("font-size: 18px") self.txtsd1 = QLineEdit('') self.txtsd1.setEnabled(True) self.txtsd1.setStyleSheet("font-size: 18px") self.lblsd2 = QLabel("SD2: ") self.lblsd2.setEnabled(True) self.lblsd2.setStyleSheet("font-size: 18px") self.txtsd2 = QLineEdit('') self.txtsd2.setEnabled(True) self.txtsd2.setStyleSheet("font-size: 18px") self.lblc1 = QLabel("C1: ") self.lblc1.setEnabled(True) self.lblc1.setStyleSheet("font-size: 18px") self.txtc1 = QLineEdit('') self.txtc1.setEnabled(True) self.txtc1.setStyleSheet("font-size: 18px") self.lblc2 = QLabel("C2: ") self.lblc2.setEnabled(True) self.lblc2.setStyleSheet("font-size: 18px") self.txtc2 = QLineEdit('') self.txtc2.setEnabled(True) self.txtc2.setStyleSheet("font-size: 18px") results.addRow(self.lblsd1, self.txtsd1) results.addRow(self.lblsd2, self.txtsd2) results.addRow(self.lblc1, self.txtc1) results.addRow(self.lblc2, self.txtc2) self.btnSave = QPushButton("Save Current Data") self.btnSave.setDisabled(True) self.btnSave.setStyleSheet("font-size: 18px") self.btnSave.clicked.connect(self.saveFile) self.viewBox=pg.GraphicsLayoutWidget() self.interFrac = self.viewBox.addViewBox(row=0, col=0, lockAspect=True) self.rafFrac = self.viewBox.addViewBox(row=0, col=1, lockAspect=True) self.bothFrac = self.viewBox.addViewBox(row=0, col=2, lockAspect=True) self.aleatFrac = self.viewBox.addViewBox(row=0, col=3, lockAspect=True) self.scaInter=pg.ScatterPlotItem() self.scaRaf=pg.ScatterPlotItem() self.scaBoth=pg.ScatterPlotItem() self.scaAleat=pg.ScatterPlotItem() self.viewBox=pg.GraphicsLayoutWidget() self.interFrac = self.viewBox.addPlot()#ViewBox(row=0, col=0, lockAspect=True) self.interFrac.setYRange(-0.1, 1.1, padding=0) self.interFrac.setXRange(-0.1, 1.1, padding=0) self.poinc = self.viewBox.addPlot()#ViewBox(row=0, col=1, lockAspect=True) self.scaInter=pg.ScatterPlotItem() self.scaPoinc=pg.ScatterPlotItem() self.roiInter=pg.PolyLineROI([[0.2, 0.5], [0.8, 0.5], [0.5, 0]], pen=(6,9), closed=True) imaFrac.addWidget(self.viewBox) buttons.setSizeConstraint(0) buttons.addWidget(self.btnLoadSig) buttons.addWidget(self.lblSignal) buttons.addWidget(self.checkTotalSignal) nomFractal = QLabel("Fractal Type") nomFractal.setStyleSheet("font-size: 18px") buttons.addWidget(nomFractal) buttons.addWidget(self.cmbFractal) buttons.addWidget(self.btnDo) buttons.addWidget(self.btnFracInter) buttons.addLayout(lagBox) buttons.addWidget(self.btnSub) buttons.addLayout(results) buttons.addWidget(self.btnSave) frac.addLayout(imaFrac) self.plot1=pg.PlotWidget() fra = QWidget() fra.setLayout(frac) graphics.addWidget(fra) graphics.addWidget(self.plot1) bot = QWidget() bot.setLayout(buttons) contain.addWidget(bot) contain.addWidget(graphics) self.addWidget(contain)
def plot_sensitivity(nc_dir, var_name, level='none', ylab='None'): """Given Wallerfing sensitivity JULES model output plots given variable sensitivity. :param nc_dir: Directory location for JULES output. :type nc_dir: str :param var_name: Name of variable to plot. :type var_name: str :param level: If variable is 4D specify 4th dimension. :type level: int :param ylab: Label for Y-axis. :type ylab: str :return: Figure object to save. :rtype: object """ sns.set_context('poster', font_scale=1.2, rc={ 'lines.linewidth': 1, 'lines.markersize': 10 }) fig, ax = plt.subplots( nrows=1, ncols=1, ) # figsize=(15, 5)) sns.set_style('whitegrid') palette = sns.color_palette("colorblind", 11) for nc_file in glob.glob(nc_dir + 'crp_g_*.3_hourly.nc'): dat = open_nc(nc_file) lats, lons, var, time = extract_vars_nc(dat, var_name, strt_yr=2012, end_yr=2012, strt_day=1, strt_hr=3) times = nc.num2date(time[:], time.units) plt_var = var[:] plt_var[plt_var > 1e18] = np.nan depths = [100, 250, 650, 2000] if var_name == 'smcl': ax.plot( times[:], plt_var[:, level, 0, 0] / depths[level], ) elif level != 'none': ax.plot( times[:], plt_var[:, level, 0, 0], ) else: ax.plot( times[:], plt_var[:, 0, 0], ) #plt.ylabel('Volumetric soil water content (m3 m-3)') plt.xlabel('Date') plt.gcf().autofmt_xdate() myFmt = mdates.DateFormatter('%B') ax.xaxis.set_major_formatter(myFmt) if ylab != 'None': plt.ylabel(ylab) #plt.legend(loc=2) #plt.show() return fig
__author__ = 'Renan Nominato' __version__ = '0.0.1' """" The main purpose of this script it is verify strategies to set a optimal stoploss level. In this way, we tested several values of STD """ #TODO include pivot and other type of stoploss estimation import numpy as np import seaborn as sns import pandas as pd import matplotlib.pyplot as plt import math sns.set_style('dark') # Generate STD and mean of a dataseries def get_std_mean(dtf, window: int): # Create a window specified by the user dft_std = dtf['Close'].rolling(window).std() dft_mean = dtf['Close'].ewm(span=window).mean() #dft_mean = dtf['Close'].rolling(window).mean() return [ dft_std.fillna(value=dft_std.mean()), dft_mean.fillna(value=dft_mean.mean()) ] #df = pd.read_pickle('XRB-BTC_T.pkl') df = pd.read_csv('BTCUSDT08_04_sec.csv')
def PeakFit_likelihood(Likelihood_cut: pd.DataFrame, mass_energy: pd.DataFrame, cutval, plots=True, constant_mean=True, constant_width=True, classifier_name='Likelihood', CB=True, Gauss=False, bkg_comb=True, bkg_exp=False, bkg_cheb=False): print('Starting fit...') matplotlib.use('Agg') # Check if we have mass in MeV or GeV if np.mean(mass_energy) > 1000: normalization_mass = 1000 else: normalization_mass = 1 sns.set_style("whitegrid") # White background on plot prediction = Likelihood_cut # rename to prediction # Set range mZmin = 60.0 mZmax = 130.0 # Number of bins NbinsZmass = 100 #Initiate the mass variable m_ee = ROOT.RooRealVar("m_ee", "Invariant mass (GeV/c^{2})", mZmin, mZmax) m_ee.setRange("MC_mZfit_range", mZmin, mZmax) # ============================================================================= # fit signal # ============================================================================= # Make a mask in the signal range. Prediction is 0 or 1, so above 0.5 is signal mask_mass = (mass_energy / normalization_mass > mZmin) & ( mass_energy / normalization_mass < mZmax) & (prediction > 0.5) Z_mass_signal = np.array(mass_energy[mask_mass] / normalization_mass) #Make np.array # Initiate 1D histogram h_mZ_all = ROOT.TH1D("h_mZ_all", "Histogram of Z mass", NbinsZmass, mZmin, mZmax) for isample in range(Z_mass_signal.shape[0]): score = Z_mass_signal[isample] h_mZ_all.Fill(score) # Constructs histogram with m_ee as argument from the 1d histogram h_mZ_all mc_Zee_mZ = ROOT.RooDataHist("mc_Zee_mZ", "Dataset with Zee m_ee", RooArgList(m_ee), h_mZ_all) # Define variables for the fits. # BW: Breit-Wigner. CB: Crystal-Ball meanBW = ROOT.RooRealVar("meanBW", "meanBW", 91.1876, 60.0, 120.0) #91.1876 meanBW.setConstant(True) # this is a theoretical constant sigmaBW = ROOT.RooRealVar("sigmaBW", "sigmaBW", 2.4952, 2.0, 20.0) #2.4952 sigmaBW.setConstant(True) # this is a theoretical constant # if constant_mean: func_BW = ROOT.RooBreitWigner("func_BW", "Breit-Wigner", m_ee, meanBW, sigmaBW) # Make the function from the constants # Crystal ball if CB: meanCB = RooRealVar("meanCB", "meanCB", -0.0716, -10.0, 10.0) # meanCB.setConstant(True) #if commented out, it can float between the minimum and maximum sigmaCB = RooRealVar("sigmaCB", "sigmaCB", 0.193, 0, 15) # sigmaCB.setConstant(True) alphaCB = RooRealVar("alphaCB", "alphaCB", 1.58, 0.0, 10.0) # alphaCB.setConstant(True) nCB = RooRealVar("nCB", "nCB", 0.886, -10, 50.0) # nCB.setConstant(True) func_sig_CB = RooCBShape("func_CB", "Crystal Ball", m_ee, meanCB, sigmaCB, alphaCB, nCB) # Define Crystal-Ball function # Gaussian elif Gauss: # Use Gaussian if True in function call meanGA = RooRealVar("meanGA", "meanGA", 10.0, -10.0, 10.0) sigmaGA = RooRealVar("sigmaGA", "sigmaGA", 3.0, 0.01, 10.0) if constant_width: sigmaGA.setConstant(True) nGA = RooRealVar("nGA", "nGA", 1.5, 0.0, 20.0) func_GA = RooGaussian("func_GA", "Gaussian", m_ee, meanGA, sigmaGA) #, nGA); if CB: # Convolute Breit-Wigner and Crystal-Ball print("Convoluting a Crystal-Ball and Breit-Wigner for signal") func_BWxCB_unextended = RooFFTConvPdf("func_BWxCB_unextended", "Breit-Wigner (X) Crystal Ball", m_ee, func_BW, func_sig_CB) elif Gauss: # Convolute Breit-Wigner and Gauss print("Convoluting a Gauss and Breit-Wigner for signal") func_BWxCB_unextended = RooFFTConvPdf("func_BWxCB_unextended", "Breit-Wigner (X) Gaussian", m_ee, func_BW, func_GA) else: # only Breit-Wigner fit on the signal print("Fitting only with Breit-Wigner for signal") func_BWxCB_unextended = func_BW m_ee.setRange("MC_mZfit_range", 85, 97) # Set the fit range for the signal nsig = RooRealVar("ntotal", "ntotal", 1000, 0, 10e6) # Define the variable for the number of signal func_BWxCB = ROOT.RooExtendPdf("signal_func_Zee", "signal_func_Zee", func_BWxCB_unextended, nsig) # Adding the nsig term to the pdf func_BWxCB.fitTo(mc_Zee_mZ, RooFit.Range("MC_mZfit_range")) # Fit the signal if plots: # Plots the signal using the function "root_plot" defined above mc_Zee_signal = root_plot(m_ee=m_ee, distribution=mc_Zee_mZ, fit=func_BWxCB, mZmin=mZmin, mZmax=mZmax, title=f'signal for cut {cutval}') #cut {cutval} # ============================================================================= # background # ============================================================================= nbkg = RooRealVar("nbkg", "nbkg", 1000, 0, 10e6) # Define the variable for the number of background #if True: m_ee.setRange("MC_mZfit_range", mZmin, mZmax) # Set range for fit as defined in the beginning c_bkg_mZ = ROOT.TCanvas("c_bkg_mZ", "", 0, 0, 1000, 500) # Make the canvas for plotting Z_mass_background = np.array(mass_energy[mask_mass] / normalization_mass) # Mask for background h_mZWenu_all = ROOT.TH1D("h_mZ_all", "Histogram of Z mass", NbinsZmass, mZmin, mZmax) # Initiate 1D histogram for isample in range(Z_mass_background.shape[0]): score = Z_mass_background[isample] h_mZWenu_all.Fill(score) # Create the lin + exponential fit lam = RooRealVar("lambda", "Exponent", -0.04, -5.0, 0.0) func_expo = ROOT.RooExponential("func_expo", "Exponential PDF", m_ee, lam) #coef_pol1 = RooRealVar("coef_pol1", "Slope of background", 0.0, -10.0, 10.0); #func_pol1 = ROOT.RooPolynomial("func_pol1", "Linear PDF", m_ee, RooArgList(coef_pol1)); # Create Chebychev polymonial a0 = RooRealVar("a0", "a0", -0.4, -5.0, 5.0) a1 = RooRealVar("a1", "a1", -0.03, -5.0, 5.0) a2 = RooRealVar("a2", "a2", 0.02, -5.0, 5.0) a3 = RooRealVar("a3", "a3", 0.02, -5.0, 5.0) # Polynomials with different order func_Cpol1 = RooChebychev("func_Cpol1", "Chebychev polynomial of 1st order", m_ee, RooArgList(a0, a1)) func_Cpol2 = RooChebychev("func_Cpol2", "Chebychev polynomial of 2nd order", m_ee, RooArgList(a0, a1, a2)) func_Cpol3 = RooChebychev("func_Cpol3", "Chebychev polynomial of 3rd order", m_ee, RooArgList(a0, a1, a2, a3)) f_exp_mZ = RooRealVar("N_lin_mZ", "CLinear fraction", 0.50, 0, 1) m_ee.setRange("low", 60, 70) m_ee.setRange("high", 110, 130) # Adding exponential and Chebychev if comb: if bkg_comb: func_ExpLin_mZ_unextended = ROOT.RooAddPdf( "func_ExpLin_mZ_unextended", "Exponential and Linear PDF", RooArgList(func_Cpol3, func_expo), RooArgList(f_exp_mZ)) elif bkg_exp: func_ExpLin_mZ_unextended = func_expo elif bkg_cheb: func_ExpLin_mZ_unextended = func_Cpol3 else: print("No background fit called. Exiting") return None func_ExpLin_mZ = ROOT.RooExtendPdf("func_ExpLin_mZ", "func_ExpLin_mZ", func_ExpLin_mZ_unextended, nbkg) # Adding the nbkg term to the pdf # Constructs histogram with m_ee as argument from the 1d histogram h_mZ_all mc_Wenu_mZ = ROOT.RooDataHist("mc_Zee_mZ", "Dataset with Zee m_ee", RooArgList(m_ee), h_mZWenu_all) func_ExpLin_mZ.fitTo(mc_Wenu_mZ, RooFit.Range("MC_mZfit_range")) #ROOT.RooFit.Range("low,high")); # Fits background #Plotting background residue = root_plot(m_ee=m_ee, distribution=mc_Wenu_mZ, fit=func_ExpLin_mZ, mZmin=mZmin, mZmax=mZmax, title=f'Background for cut {cutval}') # # ============================================================================= # Combining signal and background # ============================================================================= m_ee.setRange("MC_mZfit_range", mZmin, mZmax) Z_mass = np.array(mass_energy[mask_mass] / normalization_mass) h_mZWenu = ROOT.TH1D("h_mZ_all", "Histogram of Z mass", NbinsZmass, mZmin, mZmax) for isample in range(Z_mass.shape[0]): score = Z_mass[isample] h_mZWenu.Fill(score) # Constructs histogram with m_ee as argument from the 1d hist ogram h_mZ_all mc_ZeeWenu_mZ = ROOT.RooDataHist("mc_Zee_mZ", "Dataset with Zee m_ee", RooArgList(m_ee), h_mZWenu) ## Fits the data and returns the fraction of background f_bkg_mZ = RooRealVar("f_bkg_mZ", "Signal fraction", nbkg.getVal() / nsig.getVal(), 0.0, 1) ## Combining the signal and background fits func_SigBkg_mZ_unextended = ROOT.RooAddPdf( "func_SigBkg_mZ", "Signal and Background PDF", RooArgList(func_ExpLin_mZ_unextended, func_BWxCB_unextended), RooArgList(f_bkg_mZ)) # func_SigBkg_mZ_unextended = func_BWxCB_unextended;#ROOT.RooAddPdf("func_SigBkg_mZ", "Signal and Background PDF", RooArgList(func_BWxCB_unextended, func_BWxCB_unextended), RooArgList(f_bkg_mZ)); ntotal = RooRealVar("ntotal", "ntotal", 10000, 0, 10e6) func_SigBkg_mZ = ROOT.RooExtendPdf("func_ExpLin_mZ", "func_ExpLin_mZ", func_SigBkg_mZ_unextended, ntotal) func_SigBkg_mZ.fitTo(mc_ZeeWenu_mZ) # Fits the full data set if plots: mc_ZeeWenu_mZ_resid = root_plot(m_ee=m_ee, distribution=mc_ZeeWenu_mZ, fit=func_SigBkg_mZ, mZmin=mZmin, mZmax=mZmax, title=f'Bkg+Sig for cut {cutval}') # Baseline ntotal = 41231 (Data) # fraction 0.9333 # Baseline ntotal = 74747 (MC) # fraction 0.4427 # Malte script len(Z_mass) bkg = len(Z_mass) * f_bkg_mZ.getVal() sig = len(Z_mass) * (1 - f_bkg_mZ.getVal()) # print(f_bkg_mZ.getVal()) #### DATA #### BL_sig = 71075 * ( 1 - 0.4049 ) # BL = baseline, the number is the fraction of bkg in baseline BL_bkg = 71075 * 0.4049 # BL = baseline bkg_ratio = bkg / BL_bkg sig_ratio = sig / BL_sig max_residue = max(abs(mc_ZeeWenu_mZ_resid.getYAxisMax()), abs(mc_ZeeWenu_mZ_resid.getYAxisMin())) # print(max_residue) # print(bkg_ratio) # print(sig_ratio) if (bkg_ratio < 1.009) & (sig_ratio < 1.009) & (abs( mc_ZeeWenu_mZ_resid.getYAxisMin()) < 4.5) & (abs( mc_ZeeWenu_mZ_resid.getYAxisMax()) < 4.5): # input('....') return BL_sig, BL_bkg, sig_ratio, bkg_ratio #max_residue, ntotal.getVal(), nsig.getVal(), nbkg.getVal()return sigmaCB if CB else sigmaGA #sig_ratio, sigma_sig, bkg_ratio, sigma_bkg else: return 0, 0, 0, 0
def plot_climatology(nc_file, var_name, level='none', ylab='None'): """Given Wallerfing climatological JULES model output plots given variable climatology. :param nc_file: File location for JULES output. :type nc_file: str :param var_name: Name of variable to plot. :type var_name: str :param level: If variable is 4D specify 4th dimension. :type level: int :param ylab: Label for Y-axis. :type ylab: str :return: Figure object to save. :rtype: object """ sns.set_context('poster', font_scale=1.2, rc={ 'lines.linewidth': 1, 'lines.markersize': 10 }) fig, ax = plt.subplots( nrows=1, ncols=1, ) # figsize=(15, 5)) sns.set_style('whitegrid') palette = sns.color_palette("colorblind", 11) dat = open_nc(nc_file) lats, lons, var, time = extract_vars_nc(dat, var_name) times = nc.num2date(time[:], time.units) idx = np.where( [times[x].year == times[366 * 8].year for x in range(len(times))])[0] time_x = times[idx] plt_var = var[:] plt_var[plt_var > 1e18] = np.nan depths = [100, 250, 650, 2000] #depths = [150, 350, 650, 2000] labels = ['0 - 0.1m', '0.1 - 0.35m', '0.35 - 1m', '1 - 3m'] #if level in [0,1,2,3]: # ax.plot(times[0:365], plt_var[0:365, level]/depths[level], label='wfdei', color=palette[0]) #else: for yr in xrange(times[0].year, times[-1].year): idx = np.where([times[x].year == yr for x in range(len(times))])[0] # print len(idx) if var_name == 'smcl': ax.plot( time_x[0:364 * 8], plt_var[idx[0]:idx[364 * 8], level, 0, 0] / depths[level], ) elif level != 'none': ax.plot( time_x[0:364 * 8], plt_var[idx[0]:idx[364 * 8], level, 0, 0], ) else: ax.plot( time_x[0:364 * 8], plt_var[idx[0]:idx[364 * 8], 0, 0], ) #plt.ylabel('Volumetric soil water content (m3 m-3)') plt.xlabel('Date') plt.gcf().autofmt_xdate() myFmt = mdates.DateFormatter('%B') ax.xaxis.set_major_formatter(myFmt) if ylab != 'None': plt.ylabel(ylab) #plt.legend(loc=2) #plt.show() return fig
from scrawalPage import * import matplotlib.pyplot as plt import seaborn as sns Roomlist = Roomlist_download() print(Roomlist.head()) Roomlist_group = Roomlist.groupby( 'RoomName').count().reset_index().sort_values(by=['RoomPrice'], ascending=False) print(Roomlist_group) sns.set_style('whitegrid') sns.barplot(x='RoomName', y='RoomPrice', data=Roomlist_group[:10]) plt.show()
def getActivityTOD(folders, dbs, switch='fuel', sector_name='electric', save_data='N', create_plots='N', conversion=277.777778, run_name=''): # inputs: # 1) folders - paths containing dbs (list or single string if all in the same path) # 2) dbs - names of databases (list) # 3) switch - 'fuel' or 'tech', basis of categorization # 4) sectorName - name of temoa sector to be analyzed # 5) saveData - 'Y' or 'N', default is 'N' # 6) createPlots - 'Y' or 'N', default is 'N' # 7) conversion - conversion to GWh, default is 277.778 (from PJ) # 8) run_name - Used for saving results in dedicated folder # outputs: # 1) activity # 2) plots - optional # 3) Data - optional # ============================================================================== print("Analyzing activity by time of day (TOD)") # Save original directory wrkdir = os.getcwd() # If only a single db and folder provided, change to a list if type(dbs) == str and type(folders) == str: dbs = [dbs] folders = [folders] # If a list of folders is provided with one database, only use first folder elif type(dbs) == str: dbs = [dbs] folders = [folders[0]] # If only a single folder provided, create a list of the same folder elif type(folders) == str: fldrs = [] for db in dbs: fldrs.append(folders) folders = fldrs # Create dataframe to hold each capacity_single series activity = pd.DataFrame(dtype='float64') # Iterate through each db for folder, db in zip(folders, dbs): activity_single = SingleDB(folder, db, switch=switch, sector_name=sector_name, conversion=conversion) activity = pd.concat([activity, activity_single]) # Reset index (remove multi-level indexing, easier to use in Excel) activity = activity.reset_index() # Directory to hold results if save_data == 'Y' or create_plots == 'Y': tt.create_results_dir(wrkdir=wrkdir, run_name=run_name) # Save results to CSV if save_data == 'Y': # Create savename based on switch if switch == 'fuel': savename = 'activityTOD_by_fuel.csv' else: savename = 'activityTOD_by_tech.csv' activity.to_csv(savename) if create_plots == 'Y': df = activity.reset_index() import matplotlib.pyplot as plt import seaborn as sns for database in df.database.unique(): # new figure plt.figure() # set aesthetics sns.set_style( "white", { "font.family": "serif", "font.serif": ["Times", "Palatino", "serif"] }) sns.set_context("talk") # select relevant database df2 = df[(df.database == database)] # plot sns.relplot(x='tod', y='value', hue='fuelOrTech', row='year', col='season', data=df2, kind='line') # save if switch == 'fuel': savename = 'yearlyActivityTOD_byFuel' + tt.remove_ext( database) + '.pdf' else: savename = 'yearlyActivityTOD_byTech' + tt.remove_ext( database) + '.pdf' plt.savefig(savename, dpi=resolution) # close the figure plt.close() # Return to original directory os.chdir(wrkdir) return activity