def plotMeans(numDice, numRolls, numBins, legend, color, style): means = [] for i in range(numRolls // numDice): vals = 0 for j in range(numDice): vals += 5 * random.random() means.append(vals / float(numDice)) numpy.hist(means, numBins, color=color, label=legend, weights=[1 / len(means)] * len(means), hatch=style) return getMeanAndStd(means)
def JacobianStats(inputImage, range1): #input a jacobian image generated by ANTSJacobian and output histogram #need to include values above maximum in range1 inputImage = imageDB.loadNifti(inputImage) range1 = np.append(range1, np.max(inputImage)) counts, range1 = np.hist(inputImage, range1) return counts
def segment(image, v_min=5, v_max=250): # get rid of white and black pixels in histogram bins, edges = np.hist(image.flatten(), bins=v_max - v_min, range=(v_min, v_max)) edges = edges.astype(np.int) # should be int # identify grey peak largest_bin = np.argmax(bins) largest_edge = edges[largest_bin]
def PeakShape(): KK = np.floor(int(args.d)/int(args.w)) mu = int(args.d) r = 2. p = r/(mu+r) X = np.random.negative_binomial(r,p,1e6) X2 = np.ceil(X*np.random.rand(1,1e6)) I = np.nonzero(np.logical_and(X > 60, X < 170))[1] L1 = np.ceil((1-X2[0][I]/args.w) L2 = np.ceil((X[I]-X2[0][I])/args.w) H1 = np.histogram(L1,np.arange((-1000/int(args.w),0))) H2 = np.hist( L2,np.arange(0,(1000/int(args.w)))) H1 = H1/sum(H1) H2 = H2/sum(H2) dat1 = [H1 np.zeros(1000/int(args.w))] c1 = np.convolve(dat1,np.ones(KK)) c1 = c1[:-KK+1] dat2 = [np.zeros(1000/int(args.w)) H2] c2 = np.convolve(dat2,np.ones(KK)) c2 = c2[KK:] c = c1 + c2 c = smooth(c,5) c = c/max(c) return c def Extrap(): shape = shape[shape > 0] lshape = len(shape) shp = shape[shape > 0] shp = shp/max(shp) lshp = len(shp) shape2 = shape[shape > 0] shape2 = shape2/max(shape2) lshape2 = len(shape2) shape2 = np.interp(np.arange(1:int(args.w)*lshape2:int(args.w)),shape2,np.arange(1,jump*lshape2)) lshape2 = len(shape2) def InitData(): def PFR(): def FitShape(): shape = PeakShape() Extra(shape) InitData()
def get_map_range(self, channel): # Check if channel is a string specifying the PFM channel to plot if type(channel) is str: if channel in self.channels.keys(): data = self.channels[channel] else: raise ValueError( "Channel to plot needs to be one of {}".format( self.channels.keys())) # Check if it is the data array of the PFM channel to plot elif isinstance(channel, np.ndarray): if channel in self.channels.values(): data = channel else: raise ValueError( "Channel data does not match loaded channels in class!") data_hist = np.hist(data)
def normalized_hist(img_gray, num_bins): assert len(img_gray.shape) == 2, 'image dimension mismatch' assert img_gray.dtype == 'float', 'incorrect image type' hists = np.zeros(num_bins) bins = np.zeros(num_bins) bin_len = 255.0 / num_bins for i in range(num_bins): bins[i] = bin_len * (i + 1) bins = np.insert(bins, 0, 0) for i in range(img_gray.shape[0]): for j in range(img_gray.shape[1]): k = min(int(img_gray[i, j] / bin_len), num_bins) hists[k] += 1 hists, bins = hist(img_gray.reshape(img_gray.size), num_bins, (0, 255)) #Normalized hists = hists * 1.0 / np.sum(hists) print(hists, bins) return hists, bins
def plot_vel_dir(alist, abs_dir): velocity = get_params_as_array(alist,"Velocity") pos = get_params_as_array(alist,"pos") v = np.zeros(np.size(N)) i=0 for (vel,p) in zip(velocity,pos): r = (p[0]**2 + p[1]**2 + p[2]**2)**0.5 v[i] = vel*p/r i = i + 1 v = (v * yt.units.cm).convert_to_units('km').value #pf['km'] / pf['cm'] # now in km/s hist, bins = np.hist(v, bins = 25, density =False) centers = 0.5*(bins[:-1] + bins[1:]) plt.plot(centers, hist) plt.xlabel('vel projected towards cluster center') plt.close()
def boutlength_distribution(df, _bins=np.arange(0,10,100)): dist = np.hist(df['bout_duration'], bins=_bins)[0] count = sum(dist) return dist/float(count)
def makeHist(data, title, xlabel, ylabel, bins=20): numpy.hist(data, bins=bins) numpy.title(title) numpy.xlabel(xlabel) numpy.ylabel(ylabel)
bin_nums = 500 initial_guess = [51, 1, 11, 25, 1, 13, 15, 1, 17] # hint [height1, width1, peak position1,height2, width2, peak position2, height3, width3, peak position3] ############################################ #read the imput file with open(imp_file, "r") as f: dataset = [] for line in f: num = float(line) dataset.append(num) #make histogram with 500 bins (a tuple is created) h = hist(dataset, bins=bin_nums, range=None, normed=False, weights=None, density=None) #divide a tuple into separate strings x = h[1] xdata = x[:-1] ydata = h[0] #plot histogram plt.plot(xdata, ydata, 'b-', label='data') #define fitting function: 3 gaussians in this case def func(x, a1, b1, c1, a2, b2, c2, a3, b3, c3): return a1 * np.exp(-b1 * (x - c1)**2) + a2 * np.exp(
Function that returns log of truncated at [a, b] normal distribution with mean ``mean`` and variance ``sigma**2``. """ x_ = np.where((a < x) & (x < b), x, 1) k = math.log(norm.cdf((b - mean) / sigma) - norm.cdf((a - mean) / sigma)) result1 = -math.log(sigma) - 0.5 * math.log(2. * math.pi) -\ 0.5 * ((x_ - mean) / sigma) ** 2. - k result = np.where((a < x) & (x < b), result1, float("-inf")) return result if __name__ == '__main__': dgamma = Dlognorm_shifted(2.) np.hist(dgamma(mean=2, sigma=0.5, size=10000), bins=50, normed=True) np.hist(get_theta_sample(dgamma, mean=2, sigma=0.5, size=10000) * 180. / math.pi, bins=50, normed=True) # Load data data_lists = list() files = ['source.txt', 'beta.txt', 'sigma_beta.txt'] for file_ in files: with open(file_) as f: data_lists.append(f.read().splitlines()) sources_list = data_lists[0] betas_list = data_lists[1] beta_sigmas_list = data_lists[2] # Convert to float numbers for i, value in enumerate(betas_list):
def generate_figures(): header_data = np.loadtxt(dev_const.PUB_CONDEL_PREDICTION_RESULT, dtype='S20' )[:, :5] scores_data = np.loadtxt(dev_const.PUB_CONDEL_PREDICTION_RESULT, dtype='S20' )[:, 5:13].astype(np.float) min_value = np.amin(scores_data) max_value = np.amax(scores_data) predictor_names = ('CombiVEP', 'Phylop', 'SIFT', 'PP2', 'LRT', 'MT', 'GERP', 'Condel', ) predictor_colors = ('k', 'm', 'c', 'g', 'b', 'coral', 'darkred', 'r', ) #produce roc data from CombiVEP, Phylop, SIFT, PP2, LRT, MT, GERP, Condel fp_rates, tp_rates = calculate_roc(scores_data[header_data[:, 4] == '1'], scores_data[header_data[:, 4] == '0'], np.linspace(min_value, max_value, 5001)) fig = plt.figure() ax = fig.add_subplot(111) for i in xrange(len(predictor_names)): ax.plot(fp_rates[:, i], tp_rates[:, i], predictor_colors[i], label=predictor_names[i]) ax.set_ylabel('true positive rate') ax.set_xlabel('false positive rate') ax.legend(bbox_to_anchor=(0.9999, 0.0001), loc=4) fig.savefig(dev_const.PUB_ROC_FIG, bbox_inches='tight', pad_inches=0.05) #produce auc data from roc data fig = plt.figure() aucs = [] ind = [] ax = fig.add_subplot(111) for i in xrange(len(predictor_names)): aucs.append(auc(fp_rates[:, i], tp_rates[:, i])) ind.append(0.5*(i+1)-0.4) ax.bar(ind, aucs, 0.3, color=predictor_colors) for i in xrange(len(aucs)): ax.text(ind[i], aucs[i] + 0.01, "%0.3f" % aucs[i]) ax.set_ylim([0.7, 0.9]) ax.set_xticks(np.array(ind) + 0.15) ax.set_xticklabels(predictor_names, rotation=30) fig.savefig(dev_const.PUB_AUC_FIG, bbox_inches='tight', pad_inches=0.05) #plot scores distribution fig = plt.figure() ax = fig.add_subplot(211) hist_range = (-0.005, 1.005) patho_hist, bins = hist(scores_data[header_data[:, 4] == '1'][:, 0], bins=100, range=hist_range) neutr_hist, bins = hist(scores_data[header_data[:, 4] == '0'][:, 0], bins=100, range=hist_range) center = (bins[:-1]+bins[1:]) / 2 ax.plot(center, patho_hist, 'r--', label='pathogenic variants') ax.plot(center, neutr_hist, 'b--', label='neutral variants') ax.set_title('CombiVEP score distributuion') ax.set_ylabel('samples') ax.set_xlabel('score') ax.legend(bbox_to_anchor=(0.999, 0.999), loc=1) ax = fig.add_subplot(212) patho_hist, bins = hist(scores_data[header_data[:, 4] == '1'][:, 7], bins=100, range=hist_range) neutr_hist, bins = hist(scores_data[header_data[:, 4] == '0'][:, 7], bins=100, range=hist_range) center = (bins[:-1]+bins[1:])/2 ax.plot(center, patho_hist, 'r--', label='pathogenic variants') ax.plot(center, neutr_hist, 'b--', label='neutral variants') ax.set_title('Condel score distributuion') ax.set_ylabel('samples') ax.set_xlabel('score') ax.legend(bbox_to_anchor=(0.999, 0.999), loc=1) fig.tight_layout() fig.savefig(dev_const.PUB_SCORES_DISTR_FIG, bbox_inches='tight', pad_inches=0.05) return (dev_const.PUB_ROC_FIG, dev_const.PUB_AUC_FIG, dev_const.PUB_SCORES_DISTR_FIG, )
return gray ## gray-value histograms (Question 2.a) img_color = np.array(Image.open('./model/obj100__0.png')) img_gray = rgb2gray(img_color.astype('double')) plt.figure(1) plt.subplot(1, 3, 1) plt.imshow(img_color) plt.subplot(1, 3, 2) num_bins_gray = 40 hist_gray1, bin_gray1 = hist(img_gray.reshape(img_gray.size), num_bins_gray, (0, 255)) plt.bar((bin_gray1[0:-1] + bin_gray1[1:]) / 2, hist_gray1) plt.subplot(1, 3, 3) hist_gray2, bin_gray2 = histogram_module.normalized_hist( img_gray, num_bins_gray) plt.bar((bin_gray2[0:-1] + bin_gray2[1:]) / 2, hist_gray2) plt.tight_layout() ## more histograms (Question 2.b) #Compose and test RGB histograms (histogram_module.rgb_hist) plt.figure(2) plt.subplot(1, 2, 1) plt.imshow(img_color)
def _hist_bins(self, lens) -> np.ndarray: return np.r_[0, np.hist(lens, self.bins_number)[1]]
dy = np.diff(get(ax[i,1], 'ylim'))*inset set(ax[i,:], 'ylim', [ylimmin[i,1)-dy, ylimmax[i,1]+dy]) dx = np.zeros([1,cols]) for j in range(cols): set(ax[1,j], 'xlim', [xlimmin[1,j], xlimmax[1,j]]) dx(j) = np.diff(get(ax[1,j], 'xlim'))*inset set(ax[:,j],'xlim',[xlimmin[1,j]-dx[j] xlimmax[1,j]+dx[j]]) set(ax[1:rows-1, :], 'xticklabel', '') set(ax[:, 2:cols], 'yticklabel', '') set(BigAx,'XTick', get(ax[rows,1], 'xtick'), 'YTick', get(ax[rows,1], 'ytick'), 'userdata', ax, 'tag', 'PlotMatrixBigAx') if dohist: # Put a histogram on the diagonal for plotmatrix(y) case for i in range(rows:-1:1): histax = axes('Position', get(ax[i,i],'Position'), 'HandleVisibility', BigAxHV, 'parent', BigAxParent) nn,xx = np.hist(np.reshape(y[:,i,:],[m,k])) patches[i,:] = plt.bar(histax, xx, nn, 'hist') if putlabels: xt = 0.5*(np.max(y[:,i,:])+np.min(y[:,i,:])) yt = 0.9*np.max(nn) txt = varnames[i] plt.text(xt, yt, txt) set(histax, 'xtick', [], 'ytick', [], 'xgrid', 'off', 'ygrid', 'off') set(histax, 'xlim', [xlimmin[1,i]-dx[i], xlimmax[1,i]+dx[i]]) pax[i] = histax # ax handles for histograms patches = patches # A bug seems to occur when plotmatrix is ran to produce a plot inside a GUI # whereby the default fig menu items and icons appear. Commenting out the code below fixed the issue. # Tim Peterson - April 2016 # Make BigAx the CurrentAxes
filepath = path+file0 file1 = 'rhow_'+label+'_' + tlabel fileout = path + file1 # print 'opening: ', filepath # # data = vtktools.vtu(filepath) print 'fields: ', data.GetFieldNames() print 'extract V, R' V = data.GetVectorField('Velocity_CG') R = data.GetScalarField('Density_CG') rho = [] w = [] for d in range(len(depths)): rho.append(np.hist(R[coords[:,2]==depths[d]])) w.append(np.mean(V[coords[:,2]==depths[d],2])) rho = np.asarray(rho) w = np.asarray(w) #del data # print 'max: ', (w*rho).max(), 'min: ', (w*rho).min() # # RHO*W fig = plt.figure(figsize=(2,5)) ax = fig.add_subplot(111) rhow_z = rho*w plt.plot([0, 0], [min(depths), max(depths)], color='k', linestyle='--', linewidth=1) plt.plot(rhow_z,depths,color='0.75') plt.xlabel('$<w*rho>$')
clf() R.get_group( 63).intens5.hist(bins=linspace(0,200,1000), normed=False, log=1, histtype='step') R.get_group( 63).intens5.hist(bins=linspace(0,100,1000), normed=False, log=1, histtype='step') df_r.intens5.max() df_r.intens5.min() df_r.intens5.median() R.get_group( 63).intens5.hist(bins=linspace(0,80,100), normed=False, log=1, histtype='step') R.get_group( 120).intens5.hist(bins=linspace(0,80,100), normed=False, log=1, histtype='step') runs R.get_group( 128).intens5.hist(bins=linspace(0,80,100), normed=False, log=1, histtype='step') R.get_group( 66).intens5.hist(bins=linspace(0,80,100), normed=False, log=1, histtype='step') #R.get_group( 66).intens5.hist(bins=linspace(0,80,100), normed=False, log=1, histtype='step') df_r / df_r.partiality df_r.intens5 / df_r.partiality np.sqrt( df_r.intens5 / df_r.partiality ) np.hist(np.sqrt( df_r.intens5 / df_r.partiality ) ) (np.sqrt( df_r.intens5 / df_r.partiality ) ).hist(bins=100) df df.BnotA np.all(df.BnotA) np.all(df.AandB) np.aany(df.AandB) np.any(df.AandB) #df.inten5/ df.partiality df = df.query("HS_ratio < 0.9") df df = df.query("HS_ratio < 0.9") df df.intens5 / df.partiality df.intens5 / df.partialitB df.intens5 / df.partialityB
def cdf_benchmarking_plot(timeseries, kernel_density=False): """ Plot probability distribution of model outputs """ combined_df = pd.DataFrame() for ts in timeseries: df1 = ts.tp.to_dataframe(name=ts.plot_legend) df2 = df1.dropna().reset_index() df3 = df2.drop(["time", "lon", "lat"], axis=1) combined_df[ts.plot_legend] = df3[ts.plot_legend] time_ds = timeseries[0].time.to_dataframe(name='time') months_float = np.ceil((time_ds["time"] - np.floor(time_ds["time"])) * 12) df = combined_df.copy() df["time"] = months_float.values df["time"] = df["time"].astype(int) grouped_dfs = [] for m in range(1, 13): month_df = df[df['time'] == m] month_df = month_df.drop(['time'], axis=1) grouped_dfs.append(month_df) # Plot _fig, axs = plt.subplots(3, 4, sharex=True, sharey=True) for i in range(12): x = (i) % 3 y = int(i / 3) for b in list(combined_df): data = grouped_dfs[i][b].values X_plot = np.linspace(0, data.max(), 1000)[:, np.newaxis] # data_sorted = np.sort(data) # p = 1. * np.arange(len(data)) / (len(data) - 1) if kernel_density is True: n, _bins, _patches = np.hist((grouped_dfs[i])[b], cumulative=True, density=True, label=b) X = n.reshape(-1, 1) kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(X) log_dens = kde.score_samples(X_plot) axs[x, y].plot(X_plot[:, 0], np.exp(log_dens), label=b) else: axs[x, y].hist((grouped_dfs[i])[b], cumulative=True, density=True, label=b) axs[x, y].set_title(month_dict[i]) axs[x, y].set_title(month_dict[i]) axs[x, y].xaxis.set_tick_params(which="both", labelbottom=True) axs[x, y].yaxis.set_tick_params(which="both", labelbottom=True) axs[x, y].set_xlabel("Precipitation (mm/day)") axs[x, y].set_ylabel("CDF") axs[x, y].axvline( np.percentile((grouped_dfs[i])['ERA5'], 95), color="k", linestyle="dashed", linewidth=1, label="ERA5 95th percentile", ) plt.legend(loc="upper right") plt.show()
random_state=0, stratify=None) qtrans = QuantileTransformer2(numerical_features='auto', drop=True, dtype=np.float32, n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=42, copy=True) X_trans = qtrans.fit_transform(X_train) print(X_trans.info()) # show X_train data X_trans.hist(bins=50, figsize=(10, 10)) #plt.show() X_train.hist(bins=50, figsize=(10, 10)) #plt.show() X_trans.to_excel("output.xlsx") print(X_train.skew(numeric_only=True),'\n') print(X_trans.skew(numeric_only=True)) input=X_trans['MSSubClass_qnt'].to_numpy().flatten() a=1 b=8 output=beta.pdf(input, a, b, loc=0, scale=1) print(f'output type: {type(output)}') print('skew=',skew(output,axis=0)) print(np.hist(output))