def histo_pdfs(x_test,y_test,x_train=None,y_train=None): """ Plot the histograms of the training and test set superimposed with PDFs. """ from scipy.stats.kde import gaussian_kde num_t = np.unique(y_test.NumType.values.ravel()) num_t = map(int,list(num_t)) str_t = np.unique(y_test.Type.values.ravel()) str_t = map(str,list(str_t)) fig = plt.figure() fig.set_facecolor('white') colors = ('k','w') for feat in x_test.columns: hist = [] g = {} if y_train: c_train = ('b','g') hist_train, g_train = [],{} lab_tr = [] mini = np.min([x_test.min()[feat],x_train.min()[feat]]) maxi = np.max(x_test.max()[feat],x_train.max()[feat]) else: mini = x_test.min()[feat] maxi = x_test.max()[feat] print mini, maxi bins_hist = np.linspace(mini,maxi,25) bins = np.linspace(mini,maxi,200) for i in num_t: index = y_test[y_test.NumType.values==i].index x_plot = x_test.reindex(columns=[feat],index=index).values hist.append(x_plot) kde = gaussian_kde(x_plot.ravel()) g[i] = kde(bins) if y_train: lab_tr.append('%s (train)'%str_t[i]) index = y_train[y_train.NumType.values==i].index x_plot = x_train.reindex(columns=[feat],index=index).values hist_train.append(x_plot) kde = gaussian_kde(x_plot.ravel()) g_train[i] = kde(bins) plt.hist(hist,bins=bins_hist,color=colors,normed=1,histtype='stepfilled',alpha=.2,label=str_t) if y_train: plt.hist(hist_train,bins=bins_hist,color=c_train,normed=1,histtype='stepfilled',alpha=.2,label=lab_tr) colors = ('k','y') for key in sorted(g): plt.plot(bins,g[key],color=colors[key],lw=2.) if y_train: plt.plot(bins,g_train[key],color=c_train[key],lw=1.,ls='--') plt.legend(loc=2) plt.xlabel(feat)
def count_to_kde(D, KDE_SENSORS): problemct = 0 totalct = 0 for id in D.keys(): for sensor in D[id]: if sensor in KDE_SENSORS: training = [] maxkey = None maxcnt = None totvals = float( sum( D[id][sensor].values()) ) for key in D[id][sensor].keys(): coef = int( 1000* D[id][sensor][key]/totvals ) training += ( [key] * coef ) if len(training) > 0: # we have sufficient training data try: density = kde.gaussian_kde( training ) except: # Most likely a singular matrix (e.g. [n, n, ... ,n]). Add a little 'noise' to break singularity. training += [0.01] * 1 density = kde.gaussian_kde( training ) maxval = None maxkey = None for key in D[id][sensor].keys(): dk = density(key)[0] if maxval is None or maxval < dk: maxval = dk maxkey = key D[id][sensor] = (density, maxval) else: # we don't have sufficient training data D[id][sensor] = None return D
def graph_FWHM_data_range(start_date=datetime.datetime(2015,3,6), end_date=datetime.datetime(2015,4,15),tenmin=True, path='/home/douglas/Dropbox (Thacher)/Observatory/Seeing/Data/', write=True,outpath='./'): plot_params() fwhm = get_FWHM_data_range(start_date = start_date, end_date=end_date, path=path, tenmin=tenmin) # Basic stats med = np.median(fwhm) mean = np.mean(fwhm) fwhm_clip, low, high = sigmaclip(fwhm,low=3,high=3) meanclip = np.mean(fwhm_clip) # Get mode using kernel density estimation (KDE) vals = np.linspace(0,30,1000) fkde = gaussian_kde(fwhm) fpdf = fkde(vals) mode = vals[np.argmax(fpdf)] std = np.std(fwhm) plt.ion() plt.figure(99) plt.clf() plt.hist(fwhm, color='darkgoldenrod',bins=35) plt.xlabel('FWHM (arcsec)',fontsize=16) plt.ylabel('Frequency',fontsize=16) plt.annotate('mode $=$ %.2f" ' % mode, [0.87,0.85],horizontalalignment='right', xycoords='figure fraction',fontsize='large') plt.annotate('median $=$ %.2f" ' % med, [0.87,0.8],horizontalalignment='right', xycoords='figure fraction',fontsize='large') plt.annotate('mean $=$ %.2f" ' % mean, [0.87,0.75],horizontalalignment='right', xycoords='figure fraction',fontsize='large') xvals = np.linspace(0,30,1000) kde = gaussian_kde(fwhm) pdf = kde(xvals) dist_c = np.cumsum(pdf)/np.sum(pdf) func = interp1d(dist_c,vals,kind='linear') lo = np.float(func(math.erfc(1./np.sqrt(2)))) hi = np.float(func(math.erf(1./np.sqrt(2)))) disthi = np.linspace(.684,.999,100) distlo = disthi-0.6827 disthis = func(disthi) distlos = func(distlo) interval = np.min(disthis-distlos) plt.annotate('1 $\sigma$ int. $=$ %.2f" ' % interval, [0.87,0.70],horizontalalignment='right', xycoords='figure fraction',fontsize='large') plt.rcdefaults() plt.savefig(outpath+'Seeing_Cumulative.png',dpi=300) return
def IndivisualDistributionsPlot(): path = '/Users/ryszardcetnarski/Desktop/Distributions/' plt.style.use('ggplot') db = LoadDatabase() rest = prep.Load_rest() kde_bandwith = 0.8 #Vector for plotting for name, subject in db.groupby(db.index): fig = plt.figure() fig.suptitle(name, fontweight ='bold') bands = ['all_spectrum', 'alpha', 'beta1', 'beta2'] ax = [] for idx,band in enumerate(bands): ax.append(fig.add_subplot(220+idx+1)) training = ExtractBands(subject, 'training', band) baseline = ExtractBands(subject.dropna(subset = ['baseline_bands']), 'baseline', band) training_distribution = gaussian_kde(training, kde_bandwith) baseline_distribution = gaussian_kde(baseline, kde_bandwith) ax[idx].hist(training , alpha = 0.2, normed = True, color = 'blue') ax[idx].hist(baseline , alpha = 0.2, normed = True, color = 'yellow') if name in rest: ax[idx].axvline(rest[name]['Before'].loc[band].mean(), color = 'b', linestyle = 'dashed', linewidth = 2, label = 'rest przed') ax[idx].axvline(rest[name]['After'].loc[band].mean(), color = 'r', linestyle = 'dashed', linewidth = 2, label = 'rest po') # ax[idx].axvline(0, color = 'b', linestyle = 'dashed', linewidth = 2, label = 'rest przed') # ax[idx].axvline(0, color = 'r', linestyle = 'dashed', linewidth = 2, label = 'rest po') else: print(name) xmin, xmax = ax[idx].get_xlim() x = np.linspace(xmin-1, xmax+1, 100) # ax[idx].plot(x , training_distribution(x), color = 'blue', label ='dystrybucja trening') ax[idx].plot(x , baseline_distribution(x), color = 'yellow', label ='dystrybucja baseline') ax[idx].set_title(band) if(idx == 3): ax[idx].legend(loc = 'best') fig.savefig(path + name +'.png', dpi = 400) #break #rest.loc[name]['Before'].loc['alpha']) # return rest.loc[name]['Before'].loc['alpha'] plt.tight_layout()
def analyse(self): """Analyse : gets the peaks of the pdf.""" if self.transpose=="Yes": self.freqtransmode = self.transmode() print self.file_name,"(transposed)" self.pdf = gaussian_kde(self.freqtransmode[~numpy.isnan(self.freqtransmode)],self.bw_method) if self.transpose=="No": print self.file_name,"(not transposed)" self.pdf = gaussian_kde(self.freq[~numpy.isnan(self.freq)],self.bw_method) self.pdf = self.pdf(self.x) self.peaks()
def mutualInformation(X,Y): # Use a gaussian kernel estimator to approximate the pdfs pX = gaussian_kde(X) pY = gaussian_kde(Y) # Estimate joint pdf pXY = gaussian_kde([X,Y]) # Use estimated distributions to approx. entropies sX = entropy(pX.evaluate(X)) sY = entropy(pY.evaluate(Y)) sXY = entropy(pXY.evaluate([X,Y])) # Calculate and return mutual information between X and Y MI = sX + sY - sXY return MI
def trainMLE(self,trainingDict): """docstring for trainMLE""" trainingValues = [] for v in self.varorder: trainingValues.append(trainingDict[v]) trainingValues = np.array(trainingValues) for variable in self.variables: # indice of current variable varindex = self.varorder.index(variable) # indexes of parent variables indexes = [self.varorder.index(j) for j in self.variables if j in self.parents[variable]] indexes.sort() self.singlekdes[variable] = gaussian_kde(trainingValues[varindex]) # now we need to know how to use the joint probability distribution! self.jointkde = gaussian_kde(trainingValues)
def calc_dens(self): kde_np_peak = gaussian_kde( self.np_peak ) self.np_peak[ self.np_peak>self.x_lim2 ] = self.x_lim2 kde_np_norm = gaussian_kde( self.np_norm,bw_method=0.05 / self.np_norm.std(ddof=1) ) self.np_norm[ self.np_norm>self.x_lim2 ] = self.x_lim2 dist_space = np.linspace( 0, self.x_lim2, 100 ) x = dist_space y_peak = kde_np_peak(dist_space) y_norm = kde_np_norm(dist_space) data = { 'x':x , 'y_peak':y_peak, 'y_norm':y_norm } pd_frame = pd.DataFrame( data ) pd_frame.to_csv( self.outXls,sep="\t" )
def plot_histos_and_pdfs_kde(axHistx,axHisty,bins_x,bins_y,x_test,y_test,x_train=None,y_train=None): """ Histograms and KDE PDFs """ x_hist, y_hist = [],[] g_x, g_y = {}, {} if y_train: g_x_train, g_y_train = {}, {} feat_1 = x_test.columns[0] feat_2 = x_test.columns[1] NB_class = len(np.unique(y_test.Type.values)) if NB_class > 2: colors = ('k','gray','w') elif NB_class == 2: colors = ('k','w') for i in range(NB_class): index = y_test[y_test.NumType.values==i].index x1 = x_test.reindex(columns=[feat_1],index=index).values x2 = x_test.reindex(columns=[feat_2],index=index).values x_hist.append(x1) y_hist.append(x2) kde = gaussian_kde(x1.ravel()) g_x[i] = kde(bins_x) kde = gaussian_kde(x2.ravel()) g_y[i] = kde(bins_y) axHisty.hist(x2,bins=bins_y,color=colors[i],normed=1,orientation='horizontal',histtype='stepfilled',alpha=.5) if y_train: index = y_train[y_train.NumType.values==i].index x1 = x_train.reindex(columns=[feat_1],index=index).values x2 = x_train.reindex(columns=[feat_2],index=index).values kde = gaussian_kde(x1.ravel()) g_x_train[i] = kde(bins_x) kde = gaussian_kde(x2.ravel()) g_y_train[i] = kde(bins_y) axHistx.hist(x_hist,bins=bins_x,color=colors,normed=1,histtype='stepfilled',alpha=.5) if NB_class > 2: colors = ('y','orange','r') elif NB_class == 2: colors = ('y','r') for key in sorted(g_x): axHistx.plot(bins_x,g_x[key],color=colors[key],lw=2.) axHisty.plot(g_y[key],bins_y,color=colors[key],lw=2.) if y_train: axHistx.plot(bins_x,g_x_train[key],color=colors[key],lw=1.,ls='--') axHisty.plot(g_y_train[key],bins_y,color=colors[key],lw=1.,ls='--')
def compute_pdfs(self): """ Compute the Probability Density Functions (PDFs) for all features and all event types. """ from scipy.stats.kde import gaussian_kde self.types = np.unique(self.y.Type.values) dic={} for t in self.types: dic[t] = self.x[self.y.Type==t] self.gaussians = {} for feat in self.opdict['feat_list']: vec = np.linspace(self.x.min()[feat],self.x.max()[feat],200) #vec = np.linspace(self.x.min()[feat]+self.x.std()[feat],self.x.max()[feat]-self.x.std()[feat],200) #vec = np.linspace(self.x.mean()[feat]-self.x.std()[feat],self.x.mean()[feat]+self.x.std()[feat],200) self.gaussians[feat] = {} self.gaussians[feat]['vec'] = vec for it,t in enumerate(self.types): if len(dic[t][feat].values) > 1: if feat != 'NbPeaks': kde = gaussian_kde(dic[t][feat].values) a = np.cumsum(kde(vec))[-1] self.gaussians[feat][t] = kde(vec)/a else: self.gaussians[feat][t] = dic[t][feat].values
def make_plot(self): c_map = ['#268bd2', '#cb4b16',] fig = plt.figure() fig.patch.set_alpha(0) ax = fig.add_subplot(111) ax.set_xlabel('log$_{10}$(FPKM)') ax.set_ylabel('Density') ax.title.set_fontsize(18) for i,sample in enumerate(self.exp.sample_set.all()): df = self.get_dataframe(sample)[self.data_fields[0]] df = df[df > 0] df = df.map(math.log10) base = np.linspace(min(df), max(df), 200) kde = gaussian_kde(df) kde_pdf = kde.evaluate(base) ax.plot(base, kde_pdf, color=c_map[i], label=sample.sample_name, alpha=0.8) ax.fill_between(base, kde_pdf, color=c_map[i], alpha=0.4) ax.legend() rstyle(ax) return fig
def kde(freqs): """Estimate the pdf of the freqs using a Kernel Density Estimation. The estimation is done on the frequencies (0,500). Args: freqs (numpy.ndarray) : A list of frequencies in Hz. Returns: pdf (scipy.stats.kde.gaussian_kde) : the pdf function of freqs. Exemple: >>> from music22 import diastema,scale >>> import matplotlib.pyplot as plt >>> file_path = "/Users/anas/AUDIO/Barraq/txt/P0.txt" >>> freqs = numpy.loadtxt(file_path) >>> freqs = music22.core.clean_list(freqs) >>> pdf = music22.scale.kde(freqs) >>> plt.plot(pdf) >>> plt.show() """ global x, bw_method kde = gaussian_kde(freqs,bw_method) pdf = kde.evaluate(x) return pdf
def FWHM_stats(data,all=True,clip=False): """ Description: ------------ Return basic FWHM stats """ if all: fwhm = FWHM_all(data) elif clip: fwhm = FWHM_ave(data,clip=clip) else: fwhm = FWHM_ave(data) # Basic stats med = np.median(fwhm) mean = np.mean(fwhm) fwhm_clip, low, high = sigmaclip(fwhm,low=3,high=3) meanclip = np.mean(fwhm_clip) # Get mode using kernel density estimation (KDE) vals = np.linspace(0,30,1000) fkde = gaussian_kde(fwhm) fpdf = fkde(vals) mode = vals[np.argmax(fpdf)] std = np.std(fwhm) return [mean,med,mode,std,meanclip]
def density_at_points(data): """Use KDE to calculate the probability density at each point in a dataset. Useful for coloring points in scatterplot by the density, to better help visualize crowded regions of the plot. Parameter: data: array of shape (n_data_points, n_dimensions) Returns: densities: array of shape (n_data_points) Example: import numpy import matplotlib.pyplot as plt # prepare some data mode1 = numpy.random.multivariate_normal(mean=[0, 0], cov=[[4, 1], [1, 7]], size=300) mode2 = numpy.random.multivariate_normal(mean=[8, 8], cov=[[2, 1], [1, 1]], size=300) data = numpy.concatenate([mode1, mode2], axis=0) # calculate the contours density = density_at_points(data) # plot the data plt.scatter(data[:,0], data[:,1], s=12, c=density, cmap='inferno') """ data = numpy.asarray(data) kd = kde.gaussian_kde(data.T) return kd(data.T)
def get_chart_image(self): fig = pylab.figure() for attribute in self.chartdata: if self.histogram: try: pylab.hist(self.chartdata[attribute], bins=100, normed=self.normalized) except: print("Warning: problem rendering attribute histogram graph.") print(self.chartdata[attribute]) if self.points: try: pylab.scatter(self.chartdata[attribute], zeros(len(self.chartdata[attribute]))) except: print("Warning: problem rendering attribute distribution scattar graph.") if self.kde: try: x_axis = linspace(self.minval, self.maxval, 1000) approx_dist = gaussian_kde(self.chartdata[attribute]) pylab.plot(x_axis, approx_dist(x_axis)) except: print("Warning: problem rendering attribute distribution kde graph.") print("min: " + str(self.minval) + ", max: " + str(self.maxval)) print("linspace:" + x_axis) chart_image = StringIO.StringIO() fig.canvas.print_figure(chart_image, dpi=80) return chart_image.getvalue()
def __init__(self,sim_phone,classifier_model,power_model,callback_list) : self.sim_phone=sim_phone self.classifier_output=[] self.callback_list = callback_list self.ewma_window = [0.2]*5 self.wifi_distribution[0]= Positive_Normal(138.4186,17.2395) self.wifi_distribution[1]= Positive_Normal(85.9490,14.2045) self.wifi_distribution[2]= Positive_Normal(76.9451,13.8514) self.wifi_distribution[3]= Positive_Normal(83.0392, 8.4357) self.wifi_distribution[4]= Positive_Normal(32.9173, 32.1233) self.gps_distribution[0] = Positive_Normal(0.05,0.2) self.gps_distribution[1] = Positive_Normal(1.4450, 0.5919) self.gps_distribution[2] = Positive_Normal(3.2262, 0.4802) self.gps_distribution[3] = Positive_Normal(3.3806, 1.1705) self.gps_distribution[4] = Positive_Normal(12.5267, 7.55964) for i in range(5): print self.wifi_distribution[i] hard_act_counter = 0 for callback in self.callback_list: if callback == 0 or callback == 3 or callback == 4: hard_act_counter += 1 # if hard_act_counter == 1: # self.use_wifi = 1 # if hard_act_counter >= 1: self.use_gps = 1 ''' set initial sampling intervals in milliseconds ''' execfile(power_model) self.current_sampling_interval=max(self.power_accel.keys()) sim_phone.change_accel_interval(max(self.power_accel.keys())) if self.use_wifi == 1: sim_phone.change_wifi_interval(60000) else: sim_phone.change_wifi_interval(10000000000) if self.use_gps == 1: sim_phone.change_gps_interval(60000) else: sim_phone.change_gps_interval(10000000000) sim_phone.change_gsm_interval(max(self.power_gsm.keys())) sim_phone.change_nwk_loc_interval(max(self.power_nwk_loc.keys())) classifier_model_handle=open(classifier_model,"r"); self.feature_list = pickle.load(classifier_model_handle); for i in range(5): self.kernel_function[i] = [] for j in range(len(self.feature_list[i])): kernel_pdf = gaussian_kde(self.feature_list[i][j]) #kernel_pdf.covariance_factor = lambda : 0. #kernel_pdf._compute_covariance() self.kernel_function[i] += [kernel_pdf] self.feature_list = []
def lericsonPlot(X, x, T): __import__('mpl_toolkits.mplot3d') fig = plt.figure() ax = fig.gca(projection='3d') ax.view_init(55, 45) #bins = np.linspace(np.min(X), np.max(X), 100) bins = np.linspace(-12, 12, 100) ax.set_xlim3d(0, T) ax.set_ylim3d(bins[0], bins[-1]) ax.set_zlim3d(0, 1) ax.set_xlabel('$t$') ax.set_ylabel('$x_T$') ax.set_zlabel('$p(x_T | y_{1:T})$') for t in range(0, len(x)): if t%8 is not 0 or t<20: continue density = gaussian_kde(X[t]) xs = np.linspace(min(X[t]),max(X[t]),200) eggs = xs density.covariance_factor = lambda : .25 density._compute_covariance() #plt.plot(xs,density(xs)) ax.plot([t]*len(eggs),eggs, density(xs)) index = min(range(len(eggs)), key=lambda i: abs(eggs[i]-x[t])) print density(xs)[index] ax.scatter(t, x[t], density(xs)[index]) plt.tight_layout() plt.show()
def scatter_kde(x, y, ax=None): if ax is None: ax = pl.gca() # build kernel density estimator (KDE) kde = gaussian_kde(np.array(zip(x,y)).T) ax.scatter(x, y, alpha=0.5, color='white') # top right bottom left t = y.max() r = x.max() b = y.min() l = x.min() # Regular grid to evaluate kde upon n = 128 x_flat = np.r_[l:r:n*1j] y_flat = np.r_[b:t:n*1j] g = np.array(np.meshgrid(x_flat, y_flat)).reshape(2,n*n) # evaluate the KDE at grid points z = kde(g).reshape(n,n) ax.imshow(z, aspect=x_flat.ptp()/y_flat.ptp(), origin='lower', extent=(l,r,b,t)) return ax
def kde_plot(x): from scipy.stats.kde import gaussian_kde kde = gaussian_kde(x) positions = np.linspace(x.min(), x.max()) smoothed = kde(positions) plt.figure() plt.plot(positions, smoothed)
def build(self): self.allpoints = build_token_location_map_transpose(self.token_iterator)[1] l.debug('fitting kernel density estimate to %d points...' % len(self.token_iterator)) t1 = time.time() self.global_model = kde.gaussian_kde(self.allpoints) l.debug('...finished in %0.3f s.' % (time.time() - t1))
def determine_kde(data, size_kde=1000, ymin=None, ymax=None): ''' Helper function responsible for performing a KDE :param data: ''' if not ymin: ymin = np.min(data) if not ymax: ymax = np.max(data) kde_y = np.linspace(ymin, ymax, size_kde) try: kde_x = kde.gaussian_kde(data) kde_x = kde_x.evaluate(kde_y) # grid = GridSearchCV(KernelDensity(kernel='gaussian'), # {'bandwidth': np.linspace(ymin, ymax, 20)}, # cv=20) # grid.fit(data[:, np.newaxis]) # best_kde = grid.best_estimator_ # kde_x = np.exp(best_kde.score_samples(kde_y[:, np.newaxis])) except Exception as e: warning(e) kde_x = np.zeros(kde_y.shape) return kde_x, kde_y
def determine_kde(data, size_kde=1000, ymin=None, ymax=None): ''' Helper function responsible for performing a KDE :param data: ''' if not ymin: ymin = np.min(data) if not ymax: ymax = np.max(data) kde_y = np.linspace(ymin, ymax, size_kde)[::-1] try: kde_x = kde.gaussian_kde(data) kde_x = kde_x.evaluate(kde_y) except np.linalg.LinAlgError as e: warning(e) kde_x = np.zeros(kde_y.shape) return kde_x, kde_y
def score(aPDB,aFASTA,exe=None,logf=None): ''' Gets alignment score irregardless of alignment method. ''' scores = [] # Get PDB structure. p = PDBnet.PDBstructure(aPDB) # Get length of alignment. alignlen = len(p) # See what scores need to be done. if exe: scoresToDo = exe.scoresToDo if not scoresToDo: scoresToDo = SCORE_TYPES else: scoresToDo = SCORE_TYPES rrmsd,rpval,rmsd,tmsc,tpval,gdt = None,None,None,None,None,None # Get RRMSD and RMSD if length of alignment >= 100 residues. if 'RRMSD' in scoresToDo or 'RMSD' in scoresToDo: rrmsd, rmsd = homology.rrmsd(aPDB,aFASTA,True) if not exe or not exe.scpdbs or alignlen >= 100: rpval = 1 - truncnorm.sf(rrmsd, 0, 1, loc=0.177, scale=0.083)#normpdf(rrmsd,0.177,0.083) elif exe and logf and 'RRMSD' in scoresToDo: # Perform alignments in order to generate null distribution. logf.setTotalNum(logf.totalnum+2*(len(pdbli)+1)) logf.writeTemporary( 'Generating null distribution from SCOP for %s...' % (aPDB)) scfolders = [] run(exe.scpdbs,logf,ref=aPDB,exe=exe,quick=None) alignfldr = IO.getFileName(aPDB) o = open('%s/ref.pickl' % (alignfldr)) dic, _, _ = cPickle.load(o) vals = dic.values() o.close() pdf = gaussian_kde(vals) rpval = pdf(rrmsd) # Get GDT and TMscore. if 'TMscore' in scoresToDo: tmsc = p.tmscore(aFASTA) tpval = 1 - math.exp(-math.exp((0.1512-tmsc)/0.0242)) if 'GDT' in scoresToDo: gdt = p.gdt(aFASTA) # Add them to list in order as given. for it in scoresToDo: if it == 'RRMSD': scores.append(alignmentScore('RRMSD',rrmsd,rpval)) elif it == 'RMSD': scores.append(alignmentScore('RMSD',rmsd)) elif it == 'TMscore': scores.append(alignmentScore('TMscore',tmsc,tpval)) elif it == 'GDT': scores.append(alignmentScore('GDT',gdt)) # Return the scoring values. return scores
def pdf_estimation(x): density, xgrid, xarr = [], [], [] for i in range(len(x)): density.append((kde.gaussian_kde(x[i]))) xgrid.append(np.linspace(min(x[i]), max(x[i]), len(x[i]))) xarr.append(x[i]) return density,xgrid,xarr
def histogram(self, index, **options): data = self.datafile.data()[:, index] # (name, min, max, nuisance, prior) parameter = self.datafile.parameters[index] plot.clf() plot.figure(figsize=(10, 10), dpi=80) # x axis plot.xlabel(parameter[0]) xmin = options['xmin'] if options['xmin'] != None else parameter[1] xmax = options['xmax'] if options['xmax'] != None else parameter[2] plot.xlim(xmin, xmax) # y axis plot.ylabel('frequency') # plot plot.hist(data, bins=100, normed=1, alpha=.3) if options['kde']: kde = gaussian_kde(data) kde.set_bandwidth(bw_method='silverman') kde.set_bandwidth(bw_method=kde.factor * options['kde_bandwidth']) x = numpy.linspace(xmin, xmax, 1000) plot.plot(x, kde(x), 'r') plot.tight_layout() # save figure plot.savefig(self.pdffile)
def plotSnpDensity(snpDensityWindowSize, ax, chrSNPs, labels): # fuegt der grafik ein moving average ueber die SNP density hinzu, sowie ein dazugehoeriges kernel density estimate ax2 = ax.twinx() ax2.set_ylabel(r"Snps per Window") if chrSNPs['Position'].irow(-1)>=snpDensityWindowSize : print "calculating SNP density" (movingAveragePoolValues , positions) = movingAverageOverWindow(chrSNPs, snpDensityWindowSize, snpDensityWindowSize/2) x = chrSNPs['Position'] density = kde.gaussian_kde(x,bw_method = 'silverman' ) xgrid = numpy.linspace(x.min(), x.max(), len(x))#chrSNPs['Position'].irow(-1)/windowSize) else: print "not enough values to calculate moving average for SNPs" if len(movingAveragePoolValues) == len(positions): labels['snp density'], = ax2.plot(positions,movingAveragePoolValues) #color = mpl.rcParams['axes.color_cycle'][pools.index(pool)+2],label= pool + " snp density " + str(windowSize/1000) + "kb") labels['snp kde'], = ax2.plot(xgrid, density(xgrid)*1000000000, 'r-') n, bons, patches = ax2.hist(x, bins=8, normed=True) else: print "no snp density plotted" return ax2
def kerndens(vec,nbins=100): hist(vec, color='g', bins=nbins, normed=True, align='mid') # figure(2) gkde = gaussian_kde(vec) plot(arange(0,(1.01*(max(vec)-min(vec))),.1), gkde.evaluate(arange(0,(1.01*(max(vec)-min(vec))),.1))) show()
def test(x,y): print type(x) print type(y) nbins = 20 fig, axes = plt.subplots(ncols=2, nrows=2, sharex=True, sharey=True) axes[0, 0].set_title('Scatterplot') axes[0, 0].plot(x, y, 'ko') axes[0, 1].set_title('Hexbin plot') axes[0, 1].hexbin(x, y, gridsize=nbins) axes[1, 0].set_title('2D Histogram') axes[1, 0].hist2d(x, y, bins=nbins) # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents k = kde.gaussian_kde(data.T) xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) axes[1, 1].set_title('Gaussian KDE') axes[1, 1].pcolormesh(xi, yi, zi.reshape(xi.shape)) fig.tight_layout() plt.savefig('fig2.png')
def PlotKernel(band = 'alpha'): befores = joined[band+'_before'].as_matrix() afters = joined[band+'_after'].as_matrix() samp_split = np.vstack((befores, afters)).T samp_split = np.delete(samp_split, [1,26,36], 0) samp_joined = np.hstack((befores, afters)) samp_joined = samp_joined[ np.where(samp_joined <26)] x = np.linspace(0,28,1000) fig = plt.figure() ax = fig.add_subplot(111) my_pdf = gaussian_kde(samp_joined) for i in range (0,len(samp_split)): # kernel = signal.gaussian(np.mean(samp[i,:]), std = 1) #sub = samp[i,:] # obtaining the pdf (my_pdf is a function!) # my_pdf = gaussian_kde(sub) ax.scatter(samp_split[i,0], i, color = 'blue') ax.scatter(samp_split[i,1], i, color = 'red') # plotting the result #fig2 = plt.figure() #ax2 = fig2.add_subplot(111) ax.plot(x, my_pdf(x) *400,'r') # distribution function
def distparams(dist): """ Description: ------------ Return robust statistics of a distribution of data values Example: -------- med,mode,interval,lo,hi = distparams(dist) """ from scipy.stats.kde import gaussian_kde from scipy.interpolate import interp1d vals = np.linspace(np.min(dist)*0.5,np.max(dist)*1.5,1000) kde = gaussian_kde(dist) pdf = kde(vals) dist_c = np.cumsum(pdf)/np.sum(pdf) func = interp1d(dist_c,vals,kind='linear') lo = np.float(func(math.erfc(1./np.sqrt(2)))) hi = np.float(func(math.erf(1./np.sqrt(2)))) med = np.float(func(0.5)) mode = vals[np.argmax(pdf)] disthi = np.linspace(.684,.999,100) distlo = disthi-0.6827 disthis = func(disthi) distlos = func(distlo) interval = np.min(disthis-distlos) return med,mode,interval,lo,hi
this_df = df.filter(regex=mytime, axis=1) # compute beam radius this_df = this_df.iloc[idx_arrived, :] qx = this_df.iloc[:, 0] median_qx = np.median(qx) qy = this_df.iloc[:, 1] qz = this_df.iloc[:, 2] qr = np.sqrt(qy**2 + qz**2) nbins = 500 x = qy y = qz data = np.vstack([qy, qz]) k = kde.gaussian_kde(data) # xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j] xi, yi = np.mgrid[-3:3:nbins * 1j, -3:3:nbins * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) # scale between 0 and 1 zi = (zi - np.min(zi)) / (np.max(zi) - np.min(zi)) # print(zi) f = plt.figure(1, figsize=(7, 7)) # plt.title('KDE Gaussian on target for run \n {}'.format(type_file)) nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.05, 0.65 bottom, height = 0.05, 0.65 bottom_h = left_h = left + width + 0.02
borderaxespad=0.) plt.figure(39) plt.plot(c31_mat, label="KF1", color='b') plt.plot(c32_mat, label="KF2", color='g') plt.ylabel(r'$\theta_3$', fontsize=22) plt.xlabel('runs', fontsize=14) plt.ylim((0.1, 0.4)) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.figure() density1 = kde.gaussian_kde(gr_lik1, 0.4) dist_space = np.linspace(min(gr_lik1), max(gr_lik1), 1000) plt.hist(gr_lik1, bins=10, normed=True, histtype='stepfilled', alpha=0.2) prior = plt.plot(dist_space, density1(dist_space), label="KF1", color='r') density2 = kde.gaussian_kde(thinned21, 0.4) dist_space2 = np.linspace(min(thinned21), max(thinned21), 1000) post = plt.plot(dist_space2, density2(dist_space2), label="KF2", color='g') plt.hist(thinned21, bins=10, normed=True, histtype='stepfilled', alpha=0.2) plt.vlines(c_real[0], 0, 70, colors=u'b') plt.xlabel(r'$\theta_1$', fontsize=22) plt.ylabel(r'$p(\theta_1|data)$', fontsize=18) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
def perform_kde(self, x_, y_): ki = kde.gaussian_kde([x_, y_]) zi = np.array(ki(np.vstack([self.xi.flatten(), self.yi.flatten()]))) return zi
yhat_ts = predicted_yhat_ts[clf_param_key] # plt.figure() # plt.hist(yhat_tr[:, 1], 20, histtype='step', color='r', linewidth=2, label='train') # plt.hist(yhat_ts[:, 1], 20, histtype='step', color='b', linewidth=2, label='test') # plt.title('Patient {0}'.format(key)) # plt.xlim(0, 1) # plt.grid() # plt.legend() # # plt.show() from scipy.stats.kde import gaussian_kde from scipy.stats import entropy # Estimating the pdf and plotting pdf_tr = gaussian_kde(yhat_tr[:, 1]) pdf_ts = gaussian_kde(yhat_ts[:, 1]) x = np.linspace(0, 1, 100) en = entropy(pdf_tr(x), pdf_ts(x)) # print key-1 # print dkl.shape dkl[key - 1] = en # plt.figure() # plt.plot(x, pdf_tr(x), color='r', linewidth=2, label='train') # plt.plot(x, pdf_ts(x), color='b', linewidth=2, label='test') # # plt.hist(d1_np,normed=1,color="cyan",alpha=.8) # # plt.plot(x,norm.pdf(x,mu,stdv),label="parametric distribution",color="red") # plt.legend() # plt.grid()
import matplotlib.pyplot as plt import pandas import numpy as np from scipy.stats import kde adult_csv = pandas.read_csv('../adult.csv') adult_age = (adult_csv.iloc[:, 0] - (adult_csv.iloc[:, 0].mean())) / (adult_csv.iloc[:, 0].max() - adult_csv.iloc[:, 0].min()) adult_salary = (adult_csv.iloc[:, 2] - (adult_csv.iloc[:, 2].mean())) / (adult_csv.iloc[:, 2].max() - adult_csv.iloc[:, 2].min()) nbins = 75 k = kde.gaussian_kde([adult_salary, adult_age]) xi, yi = np.mgrid[adult_salary.min():adult_salary.max():nbins * 1j, adult_age.min():adult_age.max():nbins * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) # Make the plot plt.pcolormesh(xi, yi, zi.reshape(xi.shape)) plt.savefig('density_plot.png') plt.show()
RY = Y / Y.mean(axis=0) plt.plot(years, RY[0]) name = np.array(name) for msoa in list(name[:5]): plt.plot(years, RY[np.nonzero(name == msoa)[0][0]], label=msoa) plt.legend() # Spaghetti plot for row in RY: plt.plot(years, row) # Kernel Density (univariate, aspatial) density = gaussian_kde(Y[:, 0]) minY0 = Y[:, 0].min() * .90 maxY0 = Y[:, 0].max() * 1.10 x = np.linspace(minY0, maxY0, 100) plt.plot(x, density(x)) d2017 = gaussian_kde(Y[:, -1]) minY0 = Y[:, -1].min() * .90 maxY0 = Y[:, -1].max() * 1.10 x = np.linspace(minY0, maxY0, 100) plt.plot(x, d2017(x)) minR0 = RY.min() maxR0 = RY.max() x = np.linspace(minR0, maxR0, 100) d2007 = gaussian_kde(RY[:, 0])
index_col=False) print(AH.shape) print(len(AH)) print(AH.dtypes) print(AH.describe(include='all')) # AH['SalePrice'].hist() print(style.available) # fig, ax = plt.subplots() # plt.hist(AH['SalePrice'], bins=60, log=True)#.log.hist(bins=60, density=1) from scipy.stats.kde import gaussian_kde from numpy import linspace, hstack from pylab import plot, show, hist my_density = gaussian_kde(AH['SalePrice']) x = linspace(min(AH['SalePrice']), max(AH['SalePrice']), 1000) # plot(x, my_density(x), 'g') # hist(AH['SalePrice'], normed=1, alpha=0.3) # AH.groupby('MS Zoning')['SalePrice'].plot.hist(density=1, alpha=0.6) # ax = AH.boxplot(column='SalePrice', by='MS Zoning') # ax.get_figure().suptitle('') print(AH["MS Zoning"].value_counts()) # plt.show() # print(AH.head()) town = pd.read_csv('./Shad_Python_01_2/town_1959_2/town_1959_2.csv', encoding='cp1251', index_col=False) print(town) print(town.describe()) #среднее print(town.describe().mean)
# simulation for i in range(0, seq_length): plt.clf() # get reading measurement = sensor_seq[i, :] # update mcl.update(measurement, speed) # get the location using kde, rather than simple mean # this create the kernel, given an array it will estimate the probability over that values kde = gaussian_kde(np.transpose(mcl.x_t[:, 0])) # these are the values over wich your kernel will be evaluated pdf = kde(range(mcl.map_length)) inds = np.argmax(pdf) est = np.mean(inds) print('Estimate') print(est) true_location = start_pos + i * speed # plot the results plt.scatter(mcl.x_t[:, 0], range(mcl.npart), s=1, c='k',
train_set = np.append(train_set, all_samples[3][0:40], axis=0) train_set = np.append(train_set, all_samples[4][0:40], axis=0) train_set = np.append(train_set, all_samples[5][0:40], axis=0) test_set = np.append(all_samples[1][40:80], all_samples[2][40:80], axis=0) test_set = np.append(test_set, all_samples[3][40:80], axis=0) test_set = np.append(test_set, all_samples[4][40:80], axis=0) test_set = np.append(test_set, all_samples[5][40:80], axis=0) # 40 for training and 40 for testing for each class, thus training data and testing data should have # size of (40 * 5 = 200, 3) shape, last column is the class column assert (train_set.shape == (200, 3)) assert (test_set.shape == (200, 3)) for bw in window_bw: class1_kde = kde.gaussian_kde(train_set[train_set[:, 2] == 1].T[0:2], bw_method=bw) class2_kde = kde.gaussian_kde(train_set[train_set[:, 2] == 2].T[0:2], bw_method=bw) classification_dict, error = empirical_error(test_set, [1, 2], bayes_classifier, [[class1_kde, class2_kde]]) labels_predicted = ['w{} (predicted)'.format(i) for i in [1, 2]] labels_predicted.insert(0, 'test dataset') train_conf_mat = prettytable.PrettyTable(labels_predicted) for i in [1, 2]: a, b = [classification_dict[i][j] for j in [1, 2]] # workaround to unpack (since Python does not support just '*a') train_conf_mat.add_row(['w{} (actual)'.format(i), a, b])
def plot_persistence_density(persistence=[], persistence_file="", nbins=300, bw_method=None, max_intervals=1000, dimension=None, cmap=None, legend=False, axes=None, fontsize=16, greyblock=False): """This function plots the persistence density from persistence values list, np.array of shape (N x 2) representing a diagram in a single homology dimension, or from a :doc:`persistence file <fileformats>`. Be aware that this function does not distinguish the dimension, it is up to you to select the required one. This function also does not handle degenerate data set (scipy correlation matrix inversion can fail). :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). :param persistence_file: A :doc:`persistence file <fileformats>` style name (reset persistence if both are set). :type persistence_file: string :param nbins: Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents (default is 300) :type nbins: int. :param bw_method: The method used to calculate the estimator bandwidth. This can be 'scott', 'silverman', a scalar constant or a callable. If a scalar, this will be used directly as kde.factor. If a callable, it should take a gaussian_kde instance as only parameter and return a scalar. If None (default), 'scott' is used. See `scipy.stats.gaussian_kde documentation <http://scipy.github.io/devdocs/generated/scipy.stats.gaussian_kde.html>`_ for more details. :type bw_method: str, scalar or callable, optional. :param max_intervals: maximal number of points used in the density estimation. Selected intervals are those with the longest life time. Set it to 0 to see all. Default value is 1000. :type max_intervals: int. :param dimension: the dimension to be selected in the intervals (default is None to mix all dimensions). :type dimension: int. :param cmap: A matplotlib colormap (default is matplotlib.pyplot.cm.hot_r). :type cmap: cf. matplotlib colormap. :param legend: Display the color bar values (default is False). :type legend: boolean. :param axes: A matplotlib-like subplot axes. If None, the plot is drawn on a new set of axes. :type axes: `matplotlib.axes.Axes` :param fontsize: Fontsize to use in axis. :type fontsize: int :param greyblock: if we want to plot a grey patch on the lower half plane for nicer rendering. Default False. :type greyblock: boolean :returns: (`matplotlib.axes.Axes`): The axes on which the plot was drawn. """ try: import matplotlib.pyplot as plt import matplotlib.patches as mpatches from scipy.stats import kde from matplotlib import rc plt.rc('text', usetex=True) plt.rc('font', family='serif') persistence = _array_handler(persistence) if persistence_file != "": if dimension is None: # All dimension case dimension = -1 if path.isfile(persistence_file): persistence_dim = read_persistence_intervals_in_dimension( persistence_file=persistence_file, only_this_dim=dimension) else: print("file " + persistence_file + " not found.") return None if len(persistence) > 0: persistence_dim = np.array([ (dim_interval[1][0], dim_interval[1][1]) for dim_interval in persistence if (dim_interval[0] == dimension) or (dimension is None) ]) persistence_dim = persistence_dim[np.isfinite(persistence_dim[:, 1])] if max_intervals > 0 and max_intervals < len(persistence_dim): # Sort by life time, then takes only the max_intervals elements persistence_dim = np.array( sorted( persistence_dim, key=lambda life_time: life_time[1] - life_time[0], reverse=True, )[:max_intervals]) # Set as numpy array birth and death (remove undefined values - inf and NaN) birth = persistence_dim[:, 0] death = persistence_dim[:, 1] # default cmap value cannot be done at argument definition level as matplotlib is not yet defined. if cmap is None: cmap = plt.cm.hot_r if axes == None: fig, axes = plt.subplots(1, 1) # line display of equation : birth = death x = np.linspace(death.min(), birth.max(), 1000) axes.plot(x, x, color="k", linewidth=1.0) # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents k = kde.gaussian_kde([birth, death], bw_method=bw_method) xi, yi = np.mgrid[birth.min():birth.max():nbins * 1j, death.min():death.max():nbins * 1j, ] zi = k(np.vstack([xi.flatten(), yi.flatten()])) # Make the plot img = axes.pcolormesh(xi, yi, zi.reshape(xi.shape), cmap=cmap) if greyblock: axes.add_patch( mpatches.Polygon([[birth.min(), birth.min()], [death.max(), birth.min()], [death.max(), death.max()]], fill=True, color='lightgrey')) if legend: plt.colorbar(img, ax=axes) axes.set_xlabel("Birth", fontsize=fontsize) axes.set_ylabel("Death", fontsize=fontsize) axes.set_title("Persistence density", fontsize=fontsize) return axes except ImportError: print( "This function is not available, you may be missing matplotlib and/or scipy." )
def getPDF(values): # Draws graphs of probability density functions for the given values distribution gkde = gaussian_kde(values) return gkde
def MVKDE(S, J, proportion_matrix, filename=None, plot=False, bandwidth=0.25): """ Generates a Multivariate Kernel Density Estimator and returns a matrix representing a probability distribution according to given age categories, and ability type categories. Args: S (scalar): the number of age groups in the model J (scalar): the number of ability type groups in the model. proportion_matrix (Numpy array): SxJ shaped array that represents the proportions of the total going to each (s,j) combination filename (str): the file name to save image to plot (bool): whether or not to save a plot of the probability distribution generated by the kde or the proportion matrix bandwidth (scalar): used in the smoothing of the kernel. Higher bandwidth creates a smoother kernel. Returns: estimator_scaled (Numpy array): SxJ shaped array that that represents the smoothed distribution of proportions going to each (s,j) """ proportion_matrix_income = np.sum(proportion_matrix, axis=0) proportion_matrix_age = np.sum(proportion_matrix, axis=1) age_probs = np.random.multinomial(70000, proportion_matrix_age) income_probs = np.random.multinomial(70000, proportion_matrix_income) age_frequency = np.array([]) income_frequency = np.array([]) age_mesh = complex(str(S) + "j") income_mesh = complex(str(J) + "j") j = 18 """creating a distribution of age values""" for i in age_probs: listit = np.ones(i) listit *= j age_frequency = np.append(age_frequency, listit) j += 1 k = 1 """creating a distribution of ability type values""" for i in income_probs: listit2 = np.ones(i) listit2 *= k income_frequency = np.append(income_frequency, listit2) k += 1 freq_mat = np.vstack((age_frequency, income_frequency)).T density = kde.gaussian_kde(freq_mat.T, bw_method=bandwidth) age_min, income_min = freq_mat.min(axis=0) age_max, income_max = freq_mat.max(axis=0) agei, incomei = np.mgrid[ age_min:age_max:age_mesh, income_min:income_max:income_mesh ] coords = np.vstack([item.ravel() for item in [agei, incomei]]) estimator = density(coords).reshape(agei.shape) estimator_scaled = estimator / float(np.sum(estimator)) if plot: fig = plt.figure() ax = fig.gca(projection="3d") ax.plot_surface(agei, incomei, estimator_scaled, rstride=5) ax.set_xlabel("Age") ax.set_ylabel("Ability Types") ax.set_zlabel("Received proportion of total bequests") plt.savefig(filename) return estimator_scaled
def plotContour(filename, source=False, particle='all'): df = pd.read_hdf(filename, keys='procdf') if particle == 'all': x = np.array(df['x']) y = np.array(df['y']) z = np.array(df['z']) energy = np.array(df['energy'] * 1000) plot_title = 'Spot Size, $^{241}$Am 10$^7$ Primaries, all energies' elif particle == 'alpha': alpha_df = df.loc[df.energy > 5] x = np.array(alpha_df['x']) y = np.array(alpha_df['y']) z = np.array(alpha_df['z']) energy = np.array(alpha_df['energy'] * 1000) plot_title = 'Spot Size, $^{241}$Am 10$^7$ Primaries, Energy $>$ 5 MeV' elif particle == 'gamma': gamma_df = df.loc[(df.energy > .04) & (df.energy < 0.08)] x = np.array(gamma_df['x']) y = np.array(gamma_df['y']) z = np.array(gamma_df['z']) energy = np.array(gamma_df['energy'] * 1000) plot_title = 'Spot Size, $^{241}$Am 10$^7$ Primaries, 60 kev $<$ Energy $<$ 80 keV' else: print('specify particle type!') exit() fig, ax = plt.subplots(ncols=3) nbins = 50 counts, xbins, ybins = np.histogram2d(x, y, bins=nbins, normed=True) ax[0].hist2d(x, y, bins=nbins, cmap='plasma', normed=True) # plt.scatter(x, y, c=energy, s=1, cmap='plasma') # cb = plt.colorbar() # cb.set_label("Energy (keV)", ha = 'right', va='center', rotation=270, fontsize=14) # cb.ax.tick_params(labelsize=12) ax[0].set_xlim(-10, 10) ax[0].set_ylim(9, 19) # k_arr = np.column_stack((x,y)) # k = kde.gaussian_kde(k_arr.T) xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j] # zi = k(np.vstack([xi.flatten(), yi.flatten()])) positions = np.vstack([xi.flatten(), yi.flatten()]) values = np.vstack([x, y]) kernel = kde.gaussian_kde(values) zi = np.reshape(kernel(positions).T, xi.shape) print(np.sum(zi)) scale = len(x) / np.sum(zi) zi *= scale # print(np.sum(counts)) # print(np.min(zi), np.max(zi)) # exit() # norm = np.linalg.norm(zi) # norm_zi = zi/norm # print(xi.flatten()) # exit() # ax[1].pcolormesh(xi, yi, zi.reshape(xi.shape), cmap='plasma') ax[1].pcolormesh(xi, yi, zi, cmap='plasma') # ax[1].pcolormesh(xi, yi, norm_zi.reshape(xi.shape), cmap='plasma') ax[1].set_xlim(-10, 10) ax[1].set_ylim(9, 19) levels = [0.1] # contour_hist = ax[2].contour(counts.T,extent=[xbins.min(),xbins.max(),ybins.min(),ybins.max()],cmap='plasma') # CS = ax[2].contour(xi, yi, zi.reshape(xi.shape), cmap='plasma') CS = ax[2].contour(xi, yi, zi, cmap='plasma') # CSF = ax[2].contourf(xi, yi, norm_zi.reshape(xi.shape), cmap='plasma') # CSF = ax[2].contourf(xi, yi, zi.reshape(xi.shape), cmap='plasma') # plt.clabel(CS, fmt = '%2.1d', colors = 'k', fontsize=14) ax[2].clabel(CS, fmt='%.2f', fontsize=20) CB = plt.colorbar(CS, shrink=0.8, extend='both') ax[2].set_xlim(-10, 10) ax[2].set_ylim(9, 19) # CB = plt.colorbar(contour_hist, shrink=0.8, extend='both') # ax[2].clabel(contour_hist, fmt = '%.2f', fontsize=20) # plt.xlim(-40,40) # plt.ylim(-40,40) # ax[0].set_xlabel('x position (mm)', fontsize=16) # ax[0].set_ylabel('y position (mm)', fontsize=16) # plt.setp(ax[0].get_xticklabels(), fontsize=14) # plt.setp(ax[0].get_yticklabels(), fontsize=14) # plt.title(plot_title, fontsize=16) plt.show() if source == True: source_df = pd.read_hdf(filename, keys='sourcePV_df') sourceEnergy = np.array(source_df['energy'] * 1000) x_source = np.array(source_df['x']) print(len(x_source))
def plot_posterior_op(trace_values, ax): def format_as_percent(x, round_to=0): value = np.round(100 * x, round_to) if round_to == 0: value = int(value) return '{}%'.format(value) def display_ref_val(ref_val): less_than_ref_probability = (trace_values < ref_val).mean() greater_than_ref_probability = (trace_values >= ref_val).mean() ref_in_posterior = format_as_percent( less_than_ref_probability, 1) + ' <{:g}< '.format(ref_val) + format_as_percent( greater_than_ref_probability, 1) ax.axvline(ref_val, ymin=0.02, ymax=.75, color='g', linewidth=4, alpha=0.65) ax.text(trace_values.mean(), plot_height * 0.6, ref_in_posterior, size=14, horizontalalignment='center') def display_rope(rope): pc_in_rope = format_as_percent( np.sum((trace_values > rope[0]) & (trace_values < rope[1])) / len(trace_values), round_to) ax.plot(rope, (plot_height * 0.02, plot_height * 0.02), linewidth=20, color='r', alpha=0.75) text_props = dict(size=16, horizontalalignment='center', color='r') ax.text(rope[0], plot_height * 0.14, rope[0], **text_props) ax.text(rope[1], plot_height * 0.14, rope[1], **text_props) def display_point_estimate(): if not point_estimate: return if point_estimate not in ('mode', 'mean', 'median'): raise ValueError( "Point Estimate should be in ('mode','mean','median', None)" ) if point_estimate == 'mean': point_value = trace_values.mean() point_text = '{}={}'.format(point_estimate, point_value.round(round_to)) elif point_estimate == 'mode': point_value = stats.mode(trace_values.round(round_to))[0][0] point_text = '{}={}'.format(point_estimate, point_value.round(round_to)) elif point_estimate == 'median': point_value = np.median(trace_values) point_text = '{}={}'.format(point_estimate, point_value.round(round_to)) ax.text(point_value, plot_height * 0.8, point_text, size=16, horizontalalignment='center') def display_hpd(): hpd_intervals = hpd(trace_values, alpha=alpha_level) ax.plot(hpd_intervals, (plot_height * 0.02, plot_height * 0.02), linewidth=4, color='k') text_props = dict(size=16, horizontalalignment='center') ax.text(hpd_intervals[0], plot_height * 0.07, hpd_intervals[0].round(round_to), **text_props) ax.text(hpd_intervals[1], plot_height * 0.07, hpd_intervals[1].round(round_to), **text_props) ax.text((hpd_intervals[0] + hpd_intervals[1]) / 2, plot_height * 0.2, format_as_percent(1 - alpha_level) + ' HPD', **text_props) def format_axes(): ax.yaxis.set_ticklabels([]) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(True) ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('bottom') ax.tick_params(axis='x', direction='out', width=1, length=3, color='0.5') ax.spines['bottom'].set_color('0.5') def set_key_if_doesnt_exist(d, key, value): if key not in d: d[key] = value if kde_plot: density = kde.gaussian_kde(trace_values) l = np.min(trace_values) u = np.max(trace_values) x = np.linspace(0, 1, 100) * (u - l) + l ax.plot(x, density(x), **kwargs) else: set_key_if_doesnt_exist(kwargs, 'bins', 30) set_key_if_doesnt_exist(kwargs, 'edgecolor', 'w') set_key_if_doesnt_exist(kwargs, 'align', 'right') ax.hist(trace_values, **kwargs) plot_height = ax.get_ylim()[1] format_axes() display_hpd() display_point_estimate() if ref_val is not None: display_ref_val(ref_val) if rope is not None: display_rope(rope)
#datplot = data_b4_filt2 #Take log of data so it's properly weighted on a linear scale datx = np.log10(np.asarray(datplot[xkey])) daty = np.log10(np.asarray(datplot[ykey])) #Keep only non-NaN values tstx = np.isfinite(datx) tsty = np.isfinite(daty) good = np.logical_and(tstx, tsty) #Numpy version datx2np = datx[good] daty2np = daty[good] datnp = np.vstack([datx2np, daty2np]) k = kde.gaussian_kde(datnp) nbins = 30. xi, yi = np.mgrid[datx2np.min():datx2np.max():nbins * 1j, daty2np.min():daty2np.max():nbins * 1j] #zi = np.log10(k(np.vstack([xi.flatten(), yi.flatten()]))) zi = k(np.vstack([xi.flatten(), yi.flatten()])) #Turn grid points back into linear scale xi2 = [10**x for x in xi] yi2 = [10**x for x in yi] #Set axes ranges xmin = np.min(datplot[xkey]) xmax = np.max(datplot[xkey]) ymin = np.min(datplot[ykey])
def histograms(): genes = [ "Trpm3", "mt-Co1", "mt-Co3", "Nnat", "Ptgds", "Adam12", "Alcam", # Up early "Itih5", "Malat1", "Zbtb20", "Spp1", "Col15a1", "Ece1", "Cemip", # Up late ] ab = df_expr_ab.reindex(genes, axis=1).dropna(axis=1) bh = df_expr_bh.reindex(genes, axis=1).dropna(axis=1) assert list(ab.columns) == list(bh.columns) # Norm1 normalization # ab = ab.div(ab.sum(axis=1), axis=0) # bh = bh.div(bh.sum(axis=1), axis=0) # log1p trafo ab = ab.transform(lambda x: np.log(x + 1)) bh = bh.transform(lambda x: np.log(x + 1)) ab = ab[df_meta_ab.subclass_label == "VLMC"] bh = bh[df_meta_bh.celltype.str.startswith("FB")] assert list(ab.columns) == list(bh.columns) genes = sorted(ab.columns) # g = first(genes) colors = { 'FB1': 'purple', 'FB2': 'violet', '374_VLMC': "C0", '375_VLMC': "C1", '376_VLMC': "C2", } for g in genes: expr: pd.Series with Plox() as px: grps = [ ab[g].groupby( df_meta_ab.reindex(ab.index).cell_type_alias_label), bh[g].groupby(df_meta_bh.reindex(bh.index).celltype), ] for grp in grps: for (label, expr) in grp: if any(expr): f = gaussian_kde(expr) xx = np.linspace(0, max(expr) * 1.5, 100) px.a.plot(xx, f(xx), label=f"{label} ({len(expr)})", color=colors[label]) px.a.set_xlabel("log1p(count)") px.a.legend() px.a.set_yticks([]) px.f.savefig(out_dir / f"hist_{g}.png")
def plot_dist_byfrag(fragn, frags, category, fig_name): """ Plot neighboring fragment distance distribution categorized by fragment number. """ maxfn = max(fragn) minfn = min(fragn) if maxfn < 5: breakPoints = [range(2, max(fragn) + 1)] legendLab = ['2-max'] elif maxfn > 4 and maxfn < 10: if minfn < 5: breakPoints = [range(2, 5), range(5, maxfn + 1)] legendLab = ['2-4', '5-max'] else: breakPoints = [range(5, maxfn + 1)] legendLab = ['5-max'] else: if minfn < 5: breakPoints = [range(2, 5), range(5, 10), range(10, maxfn + 1)] legendLab = ['2-4', '5-9', '10-max'] elif minfn > 4 and minfn < 10: breakPoints = [range(5, 10), range(10, maxfn + 1)] legendLab = ['5-9', '10-max'] else: breakPoints = [range(10, maxfn + 1)] legendLab = ['10-max'] fdistbyfnum = [ [] for i in range(max(fragn) + 1) ] # distByFragNum[i] contains all distances for i fragments / GEM fdist = [] for i in range(len(frags)): coords = [x.split(':')[1].split('-') for x in frags[i]] init_dist = [ int(coords[j + 1][0]) - int(coords[j][1]) for j in range(0, len(coords) - 1) ] dist = [x for x in init_dist if x > 3000] if len(dist) > 0: fdistbyfnum[len(dist) + 1].extend(dist) fdist.extend(dist) neigh_distFrag = [[] for i in range(len(breakPoints))] for k in range(len(breakPoints)): for x in breakPoints[k]: neigh_distFrag[k].extend(fdistbyfnum[x]) dist_space = linspace(3, 8, 100) for y in range(len(breakPoints)): plt.plot(dist_space, gaussian_kde(np.log10(neigh_distFrag[y]))(dist_space), linewidth=4) plt.legend(legendLab, title="Fragment #") plt.title("F2F distance in " + str(len(fragn)) + " complexes (" + category + ")") plt.xlabel("Log10(Fragment-to-fragment distance)") plt.ylabel("Relative Density") plt.savefig(fig_name + 'f2f_by_fnum.pdf', dpi=300) plt.close() dist_space = linspace(3, 8, 100) plt.plot(dist_space, gaussian_kde(np.log10(fdist))(dist_space)) plt.title("F2F distance in " + str(len(fragn)) + " complexes (" + category + ")") plt.xlabel("Log10(Fragment-to-fragment distance)") plt.ylabel("Relative Density") #plt.show() plt.savefig(fig_name + 'f2f_all.pdf', dpi=300) plt.close() del fdistbyfnum del neigh_distFrag return fdist
def plot_network_randomization(avg_metric_sub, avg_metric_dom, metric_sub, metric_dom, ylabel, xlim_hist, alpha_level=0.05, dom_color=None, sub_color=None): '''Visualize network randomization test and calculate two-tailed p-value. Refer to the example notebook for network randomization. Parameters ---------- avg_metric_sub : np.ndarray Contains the metric of sub obtained from network randomizations avg_metric_dom : np.ndarray Contains the metric of dom obtained from network randomizations metric_sub : np.ndarray Contains the observed metric of sub metric_dom : np.ndarray Contains the observed metric of dom ylabel : string Y axis label for the randomization plot xlim_hist : (float, float) X axis limits for the histogram. alpha_level : float, optional Significance level for test visualization in the range of [0, 1]. Defaults to 0.05 dom_color : (float, float, float), optional RGB color for dom in the range of [0, 1] sub_color : (float, float, float), optional RGB color for sub in the range of [0, 1] Returns ------- string A formatted p-value ''' if dom_color is None: dom_color = tuple(v / 255 for v in (255, 109, 69)) if sub_color is None: sub_color = tuple(v / 255 for v in (39, 170, 214)) mean_values_sub = [] for dist in np.concatenate( [trial for trial in np.array(avg_metric_sub).reshape(6, 1000, -1)], axis=1): dist = dist[np.isfinite(dist)] mean_values_sub.append(dist.mean()) mean_values_dom = [] for dist in np.concatenate( [trial for trial in np.array(avg_metric_dom).reshape(6, 1000, -1)], axis=1): dist = dist[np.isfinite(dist)] mean_values_dom.append(dist.mean()) mean_values_dom = np.array(mean_values_dom) mean_values_sub = np.array(mean_values_sub) fig, axes = plt.subplots(1, 2, figsize=(12, 4), gridspec_kw={'width_ratios': [0.5, 0.4]}) lc = LineCollection(np.transpose([ np.repeat(1, mean_values_dom.size), mean_values_dom, np.repeat(4, mean_values_sub.size), mean_values_sub ]).reshape(-1, 2, 2), lw=0.5, alpha=0.1, color=(0.2, 0.2, 0.2), zorder=0, capstyle='butt') axes[0].add_collection(lc) axes[0].scatter(np.random.uniform(0.1, 0.9, mean_values_dom.size), mean_values_dom, s=5, facecolor=(0.5, 0.5, 0.5), edgecolor='k', lw=0.4) axes[0].scatter(np.random.uniform(4.1, 4.9, mean_values_sub.size), mean_values_sub, s=5, facecolor=(0.5, 0.5, 0.5), edgecolor='k', lw=0.4) axes[0].plot([1, 4], [metric_dom.mean(), metric_sub.mean()], '--', color='k', solid_capstyle='butt') axes[0].scatter([0.5, 4.5], [metric_dom.mean(), metric_sub.mean()], s=20, marker='o', facecolor=np.array([dom_color, sub_color]), edgecolor='k') axes[0].set_ylabel(ylabel, fontsize=14) axes[0].set_xticks([0.5, 4.5]) axes[0].set_xticklabels([r'$Dom$', r'$Sub$'], fontsize=14) differences = mean_values_dom - mean_values_sub pdf = gaussian_kde(differences) padding = (differences.max() - differences.min()) x = np.linspace(differences.min() - padding, differences.max() + padding, 1000) cdf = np.cumsum(pdf(x)) * np.diff(x)[0] left = np.argwhere(cdf <= alpha_level / 2).ravel().max() right = np.argwhere(cdf >= 1 - alpha_level / 2).ravel().min() observed = metric_dom.mean() - metric_sub.mean() idx = np.argmin(np.abs(x - observed)) # calculate p value ( * 2) because two-sided if np.abs(observed - x[left]) <= np.abs(observed - x[right]): p_value = 2 * cdf[idx + 1] else: p_value = 2 * (1 - cdf[idx]) axes[1].hist(mean_values_dom - mean_values_sub, bins=30, density=True, facecolor=(0, 0, 0, 0.1), edgecolor=(0, 0, 0, 0.4)) axes[1].fill_between(x[:left + 1], pdf(x[:left + 1]), facecolor='#7CB939', alpha=0.75) axes[1].fill_between(x[left:right + 1], pdf(x[left:right + 1]), facecolor='k', alpha=0.25) axes[1].fill_between(x[right:], pdf(x[right:]), facecolor='#7CB939', alpha=0.75) axes[1].plot([x[left]] * 2, [0, pdf(x[left])], c='k', alpha=0.75, lw=0.5, solid_capstyle='butt') axes[1].plot([x[right]] * 2, [0, pdf(x[right])], c='k', alpha=0.75, lw=0.5, solid_capstyle='butt') axes[1].plot(x, pdf(x), alpha=1, lw=0.5, c='k', solid_capstyle='butt') axes[1].axvline(observed, linestyle='--', color='k', solid_capstyle='butt', ymax=0.9) axes[1].set_xlim(xlim_hist) axes[1].set_xlabel('mean difference', fontsize=14) axes[1].set_ylabel('density', fontsize=14) for ax in axes.ravel(): ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') fig.tight_layout() plt.show() return 'p-value: {}'.format(p_value)
def _create_histogram_distribution(self, df, min_x=None, max_x=None, extend_x_proportion_percentage=20, postfix_label=None, obs_weights=None, denormalised=True): # get min/max values for our histogram min_hist_x = df.min() max_hist_x = df.max() extend_x_proportion_percentage = 1.0 + ( float(extend_x_proportion_percentage) / 100.0) # extend axes for PDF, so just outside histogram if min_x is not None: min_x = min(min_x, min_hist_x) * extend_x_proportion_percentage else: min_x = min_hist_x if max_x is not None: max_x = max(max_x, max_hist_x) * extend_x_proportion_percentage else: max_x = max_hist_x if denormalised: density = False vals = df.T.values.astype(np.float64) # Create a histogram with 10 buckets hist, bins = np.histogram(vals, bins=10, range=[float(min_hist_x), float(max_hist_x)], density=density, weights=obs_weights) bin_cent = (bins[1:] + bins[:-1]) * 0.5 number_of_elements = len(df.values) dist_space = np.linspace(min_x, max_x, 100) if postfix_label is None: postfix_label = '' else: postfix_label = ": " + postfix_label if number_of_elements > 1: # Create a best fit PDF using Gaussian KDE model (forcibly cast to float64) if obs_weights is None: kde = gaussian_kde(vals) else: kde = gaussian_weighted_kde(vals, weights=obs_weights.values.astype( np.float64)) # Sometimes need to transpose so the dimensions are consistent try: pdf_fit = kde(dist_space) except: pdf_fit = kde(dist_space.T) if obs_weights is None: # Calculated normal PDF weighted_stats = DescrStatsW(df.values, ddof=0) else: weighted_stats = DescrStatsW(df.values, weights=obs_weights.T.values, ddof=0) mu = weighted_stats.mean std = weighted_stats.std normal_pdf_fit = norm.pdf(dist_space, mu, std) # Scale pdf_fit (and normal PDF) by total/bin size if denormalised: bin_width = abs(bins[1] - bins[0]) N = np.sum(hist) pdf_fit = pdf_fit * (bin_width * N) normal_pdf_fit = normal_pdf_fit * (bin_width * N) df_hist = pd.DataFrame(index=bin_cent, data=hist, columns=['Histogram' + postfix_label]) df_pdf = pd.DataFrame(index=dist_space, data=pdf_fit, columns=['KDE-PDF' + postfix_label]) df_pdf['Norm-PDF' + postfix_label] = normal_pdf_fit else: return pd.DataFrame(), pd.DataFrame() return df_hist, df_pdf
classNames = ['Non diabetes', 'Diabetes'] N, M = X.shape C = len(classNames) # Draw samples from mixture of gaussians (as in exercise 11.1.1), add outlier #N = 1000; M = 1 #x = np.linspace(-10, 10, 50) #X = np.empty((N,M)) #m = np.array([1, 3, 6]); s = np.array([1, .5, 2]) #c_sizes = np.random.multinomial(N, [1./3, 1./3, 1./3]) #for c_id, c_size in enumerate(c_sizes): # X[c_sizes.cumsum()[c_id]-c_sizes[c_id]:c_sizes.cumsum()[c_id],:] = np.random.normal(m[c_id], np.sqrt(s[c_id]), (c_size,M)) #X[-1,0]=-10 # added outlier # Compute kernel density estimate kde = gaussian_kde(X.ravel()) scores = kde.evaluate(X.ravel()) idx = scores.argsort() scores.sort() print('The index of the lowest density object: {0}'.format(idx[0])) # Plot kernel density estimate figure() bar(range(20), scores[:20]) title('Outlier score') show() print('Ran Exercise 11.3.1')
obs = obs.loc[filter_classes] nbins = 40 x, y = obs.values, mod.values xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j] zi = np.zeros_like(xi) * np.nan for ibin in range(nbins): xmin = x.min() + ibin * (x.max() - x.min()) / nbins xmax = xmin + (x.max() - x.min()) / nbins in_bin = ((x >= xmin) & (x < xmax)) ybin = y[in_bin] xbin = x[in_bin] if len(ybin) > 20: k = kde.gaussian_kde((ybin)) zi[ibin] = k(np.vstack([yi[ibin].flatten()])) zi = zi / np.sum(zi, axis=1)[:, np.newaxis] zi_int = zi.cumsum(axis=1) # label=key+", "+\ # 'R = '+str(round(PR[0],3))+', '+\ # 'RMSE = '+str(round(RMSE,5))+', '+\ # 'BIAS = '+str(round(BIAS,5)),s=1.,color=colors[ikey]) axes[varkey].contour(xi, yi, zi_int.reshape(xi.shape), levels=[0.16, 0.5, 0.84], colors=['darkred', 'lightgreen', 'darkred'], linewidths=[1, 2, 1]) axes[varkey].contourf( xi,
def degree_distn(G_list, cost, group_list, title, figure_name, measure, option='hist'): ''' This can be used to plot either a histogram or a KDE function by changing the option from either 'hist' or 'kde' ''' # Create the figure fig, ax = plt.subplots(figsize=(6, 4)) degrees_list = [] for G in G_list: # Degree only has meaning if you don't have a full graph! # So while we'll *call* those values "degrees" it actually # represents strength...but only for the cost=100 graph if cost < 100: # Binarize the graph for u, v, d in G.edges(data=True): d['weight'] = 1 # Get the degrees of the graph degrees = G.degree(weight='weight').values() degrees_list += [degrees] if option == 'hist': # The ranges are different for the different costs # They're hardwired here if cost > 15: x = np.arange(0, 180, 10) if cost == 10: x = np.arange(0, 100, 10) if cost == 02: x = np.arange(0, 50, 5) color_list = [color_dict[group] for group in group_list] # Plot the histogram ax.hist(degrees_list, bins=x, color=color_list, normed=1, label=group_list) elif option == 'kde': for degrees, group in zip(degrees_list, group_list): # Calculate and plot the kde function pdf = gaussian_kde(degrees) # The ranges are different for the different costs # They're hardwired here if cost > 15: x = np.arange(0, 180, 1) if cost == 10: x = np.arange(0, 100, 1) if cost == 02: x = np.arange(0, 50, 1) ax.plot(x, pdf(x), color=color_dict[group], label=group) # Set the appropriate x and y limits if cost == 100: ax.set_xlim((0, 180)) ax.set_ylim((0, 0.02)) if cost == 20: ax.set_xlim((0, 180)) ax.set_ylim((0, 0.015)) if cost == 10: ax.set_xlim((0, 100)) ax.set_ylim((0, 0.025)) if cost == 2: ax.set_xlim((0, 50)) ax.set_ylim((0, 0.08)) if len(G_list) > 1: ax.legend(loc='upper left', framealpha=0.0, title=measure.upper()) fig.savefig(figure_name, bbox_inches=0, dpi=300) plt.close(fig)
def residual(pred, obs, x): """ This function analyzes the residual between predicted values and observed values. Given the predicted and \ observed values, this function does the following: #. Compute the empirical cumulative distribution function (CDF) between the predicted and observed data \ in units [quantile vs hours] #. Compute the residual in the CDF between observed and predicted data .. math:: r(x) = cdf_{observed}(x) - cdf_{predicted}(x) #. Invert the residual so that the CDFs and residuals are in units [minutes vs quantile] :param numpy.ndarray pred: the predicted (ABMHAP) values used to make the empirical CDF :param numpy.ndarray obs: the observed (CHAD) values used to make the empirical CDF :param numpy.ndarray x: the x-values :param bool do_scaling: this scales the inverted cdf residual by the standard deviation of the observed values :return: the data for the cumulative distribution data (predicted, observed, residual, and scaled residual), \ the data for the inverted cumulative distribution data (predicted, observed, residual, and scaled residual) :rtype: pandas.core.frame.DataFrame, pandas.core.frame.DataFrame """ # # CDF # # smooth probability density functions f_obs = kde.gaussian_kde(obs) f_pred = kde.gaussian_kde(pred) # the density vectors d_obs = f_obs(x) d_pred = f_pred(x) # the cumalative distribution functions cdf_obs = integrate.cumtrapz(y=d_obs, x=x, initial=0) cdf_pred = integrate.cumtrapz(y=d_pred, x=x, initial=0) # the residual in the CDFs res = cdf_obs - cdf_pred res_scaled = res / np.std(cdf_obs) # # the inverted CDF # # create functions that represent the inverted cdf f_inv_obs = interpolate.interp1d(x=cdf_obs, y=x) f_inv_pred = interpolate.interp1d(x=cdf_pred, y=x) # the probability p_max = min(cdf_obs.max(), cdf_pred.max()) p = np.linspace(0, p_max, num=len(x)) # the inverted of the CDF cdf_inv_obs = f_inv_obs(p) cdf_inv_pred = f_inv_pred(p) res_inv = (cdf_inv_obs - cdf_inv_pred) * (-1) res_inv_scaled = res_inv / np.std(obs) # # Output # # combine all of the information into a data frame y_data = { 'pred': cdf_pred, 'obs': cdf_obs, 'res': res, 'res_scale': res_scaled } y_inv_data = { 'pred': cdf_inv_pred, 'obs': cdf_inv_obs, 'res': res_inv, 'res_scale': res_inv_scaled } # the cumulative distribution data cdf = pd.DataFrame(y_data) # the inverted cumulative distribution data inv_cdf = pd.DataFrame(y_inv_data) return cdf, inv_cdf
nbins = 20 fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(12, 5)) #axes[0].set_title('Trajectory') axes[0].set(title='Trajectory', xlim=(0, args.frameWidth), xticks=list(range(0, args.frameWidth, 60)), ylim=(0, args.frameHeight), yticks=list(range(0, args.frameHeight, 60))) axes[0].plot(x, y, 'k') # Evaluate a gaussian the kernel density estimation on a regular grid of nbins x nbins k = kde.gaussian_kde(points.T) xi, yi = np.mgrid[x.min():x.max():nbins * 1j, y.min():y.max():nbins * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) # Plot density with shading #axes[1].set_title('Heatmap') axes[1].set(title='Heatmap', xticks=list(range(0, args.frameWidth, 60)), yticks=list(range(0, args.frameHeight, 60))) pc = axes[1].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.jet)
if source.data.where.startswith('gal'): if 'pwn' or 'unid' in source.data.classes: try: gammacat_pwn_glon.append(source.spatial_model().lon_0.value) gammacat_pwn_glat.append(source.spatial_model().lat_0.value) except: None gammacat_pwn_glat = np.array(gammacat_pwn_glat) gammacat_pwn_glon = np.array(gammacat_pwn_glon) gammacat_pwn_glon = np.concatenate([ gammacat_pwn_glon[gammacat_pwn_glon > 180] - 360, gammacat_pwn_glon[gammacat_pwn_glon < 180] ]) k = kde.gaussian_kde(np.array([gammacat_pwn_glon, gammacat_pwn_glat])) nbins = 200 xi, yi = np.mgrid[gammacat_pwn_glon.min():gammacat_pwn_glon.max():nbins * 1j, gammacat_pwn_glat.min():gammacat_pwn_glat.max():nbins * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) zi /= zi.max() glat = final.GLAT glon = final.GLON glon = np.concatenate([glon[glon > 180] - 360, glon[glon < 180]]) k1 = kde.gaussian_kde(np.array([glon, glat])) nbins = 200 xi1, yi1 = np.mgrid[glon.min():glon.max():nbins * 1j, glat.min():glat.max():nbins * 1j] zi1 = k1(np.vstack([xi1.flatten(), yi1.flatten()]))
plt.ylabel(ylabes[ip], fontsize=12) plt.xlabel('Years', fontsize=12) x0 = np.linspace(0.5, 6, 40) y0 = np.exp(uMAP + betaMAP + beta1MAP[ip] * xl + beta2MAP[ip] * elec_Pca_char1[ip * 42:(ip * 42 + 40)] + \ beta3MAP[ip] * elec_Pca_char2[ip * 42:(ip * 42 + 40)] + + beta4MAP[ip] * xl * xl) # Posterior sample from the trace # for ips in np.random.randint(burnin, 3000, ppcsamples): # param = trace[ips] # yl2 = np.exp(param['beta'][ip] + param['beta1'][ip] * (xl) + param['beta2'][ip]*elec_Pca_char1[ip*42:(ip*42+40)] + \ # param['beta3'][ip]*elec_Pca_char2[ip*42:(ip*42+40)]) # ax0.plot(xl, yl2, 'k', linewidth=2, alpha=.05) ax1 = plt.subplot(gs[1 + ip * 3]) my_pdf1 = gaussian_kde(kde_beta2[:, ip]) x1 = np.linspace(-8, 8, 300) ax1.plot(x1, my_pdf1(x1), 'k', lw=2.5, alpha=0.6) plt.xlim((-8, 8)) plt.xlabel(r'$\beta2$', fontsize=15) plt.ylabel('Posterior Density', fontsize=12) ax2 = plt.subplot(gs[2 + ip * 3]) my_pdf2 = gaussian_kde(kde_beta3[:, ip]) x2 = np.linspace(-6, 6, 300) ax2.plot(x2, my_pdf2(x2), 'k', lw=2.5, alpha=0.6) plt.xlim((-6, 6)) plt.xlabel(r'$\beta3$', fontsize=15) plt.ylabel('Posterior Density', fontsize=12) plt.title('Subject %s' % (ip + 1))
def get_normalized_principle_moment_ratios(): #molecules = zinc_smiles.smiles #m1=[] #for file in sorted(glob.glob("/content/drive/MyDrive/zinc15_new/mol_files/add_hydrogen/*.mol")): #name = (file.split('.')[0]).split('/')[-1] # m = Chem.MolFromMolFile(file) #m1.append(m) # shuffle the molecules before plotting molecules = [ Chem.MolFromMolFile(mol) for mol in sorted(glob.glob("../../data/coformer1/*.mol")) ] # sorted(glob.glob("/content/drive/MyDrive/zinc15_new/mol_files/add_hydrogen/*.mol")) ] #[mol for mol in m1] #name = [(file.split('.')[0]).split('/')[-1] for file in sorted(glob.glob("/content/zinc20_updated/*"))] #print(name) #if name == 'ZINC000085548520': # print(zinc20_lumo_dict['ZINC000085548520']) print(len(molecules)) #random.shuffle(molecules) # create a list of all the NPRs npr1 = list() npr2 = list() fails = 0 n_mols = 0 for mol in molecules: try: #mol = Chem.AddHs(Chem.MolFromSmiles(smile)) #AllChem.EmbedMolecule(mol) # generate a 3D embedding npr1.append(rdkit.Chem.Descriptors3D.NPR1(mol)) npr2.append(rdkit.Chem.Descriptors3D.NPR2(mol)) n_mols += 1 #print(npr2) except: fails += 1 print(mol) if n_mols == 10000: print("-- Truncating at 10K") break print(len(npr1)) print(len(npr2)) nbins = 30 k = kde.gaussian_kde((npr1, npr2)) xi, yi = np.mgrid[0:1:nbins * 1j, 0.5:1:nbins * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) # plot the NRP on a 2D map fig = plt.figure(figsize=(10, 8)) fig.patch.set_facecolor('white') plt.rcParams['axes.facecolor'] = 'white' plt.grid(False) plt.rcParams['axes.spines.top'] = True plt.rcParams['axes.spines.bottom'] = True plt.rcParams['axes.spines.left'] = True plt.rcParams['axes.spines.right'] = True #c=[zinc20_homo_dict[i] for i in name] #print(max(c)) #print(min(c)) #facecolors = [cm.viridis(x) for x in c] plt.hexbin( npr1, npr2 ) #, gridsize=nbins, C=zi, cmap=plt.cm.jet_r, mincnt=1, extent=(0, 1, 0.5, 1), alpha=0.8, zorder=6)#, vmin=0, vmax=150, zorder=0) cbar = plt.colorbar() cbar.ax.tick_params(labelsize=15) #cbar.set_label('kernel density', fontsize=16) #cbar.set_label('LUMO$_{+1}$-LUMO$_{+2}$ degeneracy', fontsize=16) cbar.set_label('H**O-LUMO degeneracy', fontsize=16) #plt.contour(xi, yi, zi.reshape(xi.shape), levels=5, zorder=1) plt.fill([0, 0, 0.5], [0.5, 1, 0.5], "white", zorder=2) # `white out' the bottom left corner of the plot plt.fill([1, 1, 0.5], [0.5, 1, 0.5], "white", zorder=3) # `white out' the bottom right corner of the plot plt.plot([0, 0.5], [1, 0.5], color="lightsteelblue", linewidth=3.5, zorder=4) plt.plot([0.5, 1], [0.5, 1], color="lightsteelblue", linewidth=3.5, zorder=5) plt.plot([0, 1], [1, 1], color="lightsteelblue", linewidth=3.5, zorder=0) #plt.axvline(x=3.5, alpha=0.5) plt.ylabel("NPR2", fontsize=16) plt.xlabel("NPR1", fontsize=16) plt.xticks(fontsize=16) plt.yticks(fontsize=16) matplotlib.rc('axes', edgecolor='black') #ax.spines['bottom'].set_color('black') #ax.spines['top'].set_color('black') #ax.spines['right'].set_color('black') #ax.spines['left'].set_color('black') #plt.plot(loss.Epochs.values, loss.Column7.values, '-o') plt.ylim(0.4, 1.05) plt.xlim(-0.05, 1.05) plt.savefig("../../data/figures/npr_mol1.png", dpi=600, bbox_inches='tight') #print("-- File saved in ", smi_file[:-4] + "_npr.png") # return the values return npr1, npr2, fails
if contour: yeardays = raw.groupby(raw.index.dayofyear).mean() weights = aggregation.clusterPeriodNoOccur yeardays["cluster_str"] = [ f"Cluster {i+1} ({weights[i]} days)" for i in yeardays["cluster"].values ] nbins = 100 fig = go.Figure() c = sns.color_palette("cubehelix", n_colors=len(typPeriods_m.loc[:, 1, :])) for i, (cluster, df) in enumerate(yeardays.groupby("cluster_str")): x = df.PV.values y = df.Wind.values k = kde.gaussian_kde([x, y]) xi, yi = np.mgrid[x.min() : x.max() : nbins * 1j, y.min() : y.max() : nbins * 1j] zi = k(np.vstack([xi.flatten(), yi.flatten()])) xc = np.linspace(x.min(), x.max(), nbins) yc = np.linspace(y.min(), y.max(), nbins) colorscale = [ [0, "rgba(0,0,0,0)"], [0.5, f"rgba{c[i][0], c[i][1], c[i][2], 0}"], [1, colors[i]], ] fig.add_trace( go.Contour( x=xc, y=yc, z=zi.reshape(xi.shape),
new_data = {k: len(v) / numberOfSearchQueries for k, v in new.items()} import matplotlib.pylab as plt plt.figure(figsize=(15, 5)) plt.bar(new_data.keys(), new_data.values(), width=.5, color='g') plt.xlabel("Path Length") plt.ylabel("Probability") plt.title("Path Length vs Probability for N=" + str(i)) plt.savefig("pathLengthProbability_" + str(i) + ".eps", format="eps") for i in numnodes: out = [] fig1 = plt.figure() for k, v in totalData[i][0].items(): out.append(v) kde = gaussian_kde(out) mu = np.mean(out) variance = np.var(out) sigma = math.sqrt(variance) dist_space = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) plt.plot(dist_space, stats.norm.pdf(dist_space, mu, sigma)) plt.xlabel("Path Length") plt.ylabel("PDF") plt.title("Path Length PDF") plt.savefig("pathLengthpdf_" + str(i) + ".eps", format="eps") print("--------------plots after deletion---------------") mapAverageHopsMean = {} mapAverageHopsStd = {} mapAverageHops = {} for key, value in totalDataAfterDeletion.items():
def run(self, samples=None, progress=True): """ Perform MCMC calibration. Returns pdfs and MCMC traces. Args: samples(integer or tuple): A tuple containing the number of samples, the number to burn, and the number to thin. If samples is an integer, burn will be 20% of the samples and thin will be 8. Default will use between 10000 and 1000000 samples, depending on the number of stochastic variables being calibrated. progress(boolean): If True, will display a progress bar. Returns(tuple): Returns a tuple containing cvars and a pdf. cvars is modified to include key 'trace' which will be an array. It will also have a key 'pdf' which will be a PDF function. For GAUSSIAN type, it will also include traces 'mtrace' and 'dtrace' and 'jpdf' corresponding to the mean and deviation traces and the joint PDF. """ if samples is None: num_samples = self.num_samples num_burn = self.num_burn num_thin = self.num_thin else: if type(samples) == tuple: if len(samples) != 3: raise ValueError( "Error: samples should be a number or tuple of length 3." ) num_samples, num_burn, num_thin = samples else: num_samples = samples num_burn = int(samples * 0.20) num_thin = 8 Calibrate.mcmc = pymc.MCMC(self.mcmc_model) Calibrate.mcmc.sample(iter=num_samples, burn=num_burn, thin=num_thin, tune_interval=10000, tune_throughout=True, progress_bar=progress) if Calibrate.mcmc is None: return None for v in self.cvars.keys(): t = self.var[v].trace[:] if len(t.shape) == 2: self.cvars[v]['ntraces'] = t.shape[1] else: self.cvars[v]['ntraces'] = 1 self.cvars[v]['trace'] = t.ravel() for v in self.means.keys(): self.cvars[v]['mtrace'] = self.means[v].trace[:] self.cvars[v]['dtrace'] = self.devs[v].trace[:] # collect all the independent variables and compute KDE col_count = max([self.cvars[v]['ntraces'] for v in self.cvars]) for cv in self.cvars.keys(): if self.cvars[cv]['type'] == 'S': data = np.column_stack( (self.cvars[cv]['dtrace'], self.cvars[cv]['mtrace'])) try: self.cvars[cv]['jpdf'] = gaussian_kde(data.T) except: self.cvars[cv]['jpdf'] = None # multidimensional traces get flattened and others # get repeated to match size. if self.cvars[cv]['ntraces'] == col_count: n = 1 else: n = col_count try: self.cvars[cv]['pdf'] = gaussian_kde( self.cvars[cv]['trace'].ravel()) except: self.cvars[cv]['pdf'] = None self.cvars[cv]['trace'] = self.cvars[cv]['trace'].ravel().repeat(n) data = np.column_stack( [self.cvars[v]['trace'] for v in sorted(self.cvars.keys())]) try: k = gaussian_kde(data.T) except: k = None return (self.cvars, k)