def plot_heatmap(x, y_mat, alpha=1,title=None,sizes=None,share_axis=False): if sizes is None: sizes = 60 if y_mat.ndim == 1: y_mat = np.expand_dims(y_mat, 1) pl.close() fig = pl.figure(4) fig.suptitle(title) for index, y in enumerate(y_mat.T): if index == 0: ax1 = pl.subplot(y_mat.shape[1], 1, index + 1) else: if share_axis: pl.subplot(y_mat.shape[1], 1, index + 1, sharex=ax1, sharey=ax1) else: pl.subplot(y_mat.shape[1], 1, index + 1) red_values = normalize(y) I = np.isfinite(y) & np.isfinite(x[:,0]) & np.isfinite(x[:,1]) colors = np.zeros((red_values.size, 3)) colors[:,0] = red_values pl.ylabel(str(index)) if I.mean > 0: print 'Percent skipped due to nans: ' + str(1-I.mean()) pl.scatter(x[I,0], x[I,1], alpha=alpha, c=colors[I,:], edgecolors='none', s=sizes) move_fig(fig, 1000, 1000) pl.show(block=True) pass
def plot_trajectory(mu_vector): data0 = mu_vector[:, 0] data1 = mu_vector[:, 1] labels = ["{0}".format(i) for i in xrange(len(mu_vector))] plt.scatter(data0[:, 0], data0[:, 1], color="red") plt.scatter(data1[:, 0], data1[:, 1], color="blue") for i in xrange(len(mu_vector)): plt.annotate( labels[i], (data0[i, 0], data0[i, 1]), fontsize=5, xytext=(-10, 20), textcoords="offset points", ha="right", va="bottom", arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0"), ) plt.annotate( labels[i], (data1[i, 0], data1[i, 1]), fontsize=5, xytext=(-10, 20), textcoords="offset points", ha="right", va="bottom", arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0"), ) plt.savefig("Mean_Trajectory.png") plt.show()
def test(args): data = multivariate_normal([0, 0], [[1, 2], [2, 5]], int(args[1])) print(data) # PCA result = pca(data, base_num=int(args[2])) pc_base = result[0] print(pc_base) # Plotting fig = plt.figure() fig.add_subplot(1, 1, 1) plt.axvline(x=0, color="#000000") plt.axhline(y=0, color="#000000") # Plot data plt.scatter(data[:, 0], data[:, 1]) # Draw the 1st principal axis pc_line = sp.array([-3.0, 3.0]) * (pc_base[1] / pc_base[0]) plt.arrow(0, 0, -pc_base[0] * 2, -pc_base[1] * 2, fc="r", width=0.15, head_width=0.45) plt.plot([-3, 3], pc_line, "r") # Settings plt.xticks(size=15) plt.yticks(size=15) plt.xlim([-3, 3]) plt.tight_layout() plt.show() plt.savefig("image.png") return 0
def main(): # an example non-autonomous function x0 = 1 t = np.linspace(1,3,500) # use the same syntax as odeint sol = LSolve(example,x0,t,args=(1,1)) if matplotlib_module: mp.figure(1) mp.title("Example solution") mp.plot(t,sol) # example integrate and fire code x0 = 0 t2 = np.linspace(0,10,500) # again the syntax is the same as odeint, but we add aditional inputs, # including a flag to track spikes (IF models only): threshold = 5 sol2,spikes = LSolve(QIF,x0,t,threshold=threshold,reset=0,spike_tracking=True,args=(5,)) # extract spike times spikes[spikes==0.0] = None spikes[spikes==1.0] = threshold if matplotlib_module: mp.figure(2) mp.title("QIF model with noise") mp.plot(t2,sol2) mp.scatter(t2,spikes,color='red',facecolor='red') mp.show()
def movie_plotter(components, movies, movie_id="all", x_buffer=3, y_buffer=2): if movie_id == "all": plt.scatter(components[:,0], components[:,1]) plt.xlabel("Component 1") plt.ylabel("Component 2") plt.show() else: x = components[movie_id][0] y = components[movie_id][1] xs = [x - x_buffer, x + x_buffer] ys = [y - y_buffer, y + y_buffer] plt.scatter(components[:,0], components[:,1]) plt.xlim(xs) plt.ylim(ys) plt.xlabel("Component 1") plt.ylabel("Component 2") for x, y, title in zip(components[:,0], components[:,1], movies['movie_title']): if x >= xs[0] and x <= xs[1] and y >= ys[0] and y <= ys[1]: try: plt.text(x, y, title) except: pass
def classification_regions(network, title, img_file_name, interval=100): coords = [ (i / interval, j / interval) for j in range(0, interval + 1, 1) for i in range(0, interval + 1, 1)] classified_records = [] for coord in coords: output = network.run(coord) classified_records.append( [coord, output.index(max(output)) + 1]) plt.scatter( [record[0][0] for record in classified_records], [record[0][1] for record in classified_records], c=[record[1] for record in classified_records], ) plt.xlim((0, 1)) plt.ylim((0, 1)) plt.title(title) plt.xlabel('Six-fold rotational symmetry') plt.ylabel('Eccentricity') plt.savefig(img_file_name)
def main_k_nearest_neighbour(k): X, y = make_blobs(n_samples=100, n_features=2, centers=2, cluster_std=1.0, center_box=(-10.0, 10.0)) h = .4 x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) z = np.c_[xx.ravel(), yy.ravel()] z_f = [] for i_z in z: z_f.append(k_nearest_neighbour(X, y, i_z, k, False)) zz = np.array(z_f).reshape(xx.shape) plt.figure() plt.contourf(xx, yy, zz, cmap=plt.cm.Paired) plt.axis('tight') plt.scatter(X[:, 0], X[:, 1], c=y) plt.show()
def winding_number(filename,window,bins=0): data=tools.read_matrix_from_file(filename); print "file read." print len(data[1]) if (bins ==0): bins=window times=np.zeros(bins) values=np.zeros(bins) ns=np.zeros(bins) step=window/bins for i in range(0,bins): times[i]=i*step for k in range(0,len(data[1])-window,window): for j in range(k,window+k,step): for i in range(0,bins): values[i]=values[i]+(data[1][j]-data[1][j - i*step])**2 ns[i]=ns[i]+1 for i in range(0,bins): if (ns[i] != 0): values[i]=values[i]/ns[i] else: values[i]=0 plt.scatter(times,values) return [times,values]
def plot_pulses(results, ymin=0, ymax=20): plt.plot(results["times"], results["amounts"]) s = np.array([1] * len(results["times"])) c = np.array(["k"] * len(results["times"])) if "durations" in results: # semi-Markovian start = 0 for d, pulse in zip(results["durations"], results["pulses"]): end = min(start + d, len(results["times"]) - 1) if pulse: c[start:end] = "red" s[start:end] = 2 start += d else: # Markovian for n, t in enumerate(results["times"]): pulse = results["pulses"][n] if pulse: c[n] = "red" s[n] = 2 plt.scatter(results["times"], [1] * len(results["times"]), color=c, s=s) plt.xlabel(r"Time, $t$") plt.ylabel("Glucose amount") plt.ylim([ymin, ymax]) plt.xlim([time_obj.t.min(), time_obj.t.max()]) sns.despine()
def main(): x0 = np.loadtxt('ex/ex5Linx.dat') y = np.loadtxt('ex/ex5Liny.dat') x0.shape=x0.size,1 y.shape = y.size,1 plt.scatter(x0,y) x = polynomial_linear(x0) # x,mns,sstd = z_scale(x) theta_normal = linear_normal_equation(x,y, 1.0) print 'normal equation:' print theta_normal plot_fitting(theta_normal) plt.show() m,n=x.shape alphas = ( 0.01, 0.03, 0.1, 0.3, 1, 1.3 ) # if alpha >=1.3, no convergence result lambdas = (0, 1, 10) MAX_ITR = 100 for lam in lambdas: for alpha in alphas: theta,Js = linear_regression(x,y, MAX_ITR, alpha, lam) if alpha==0.03 and lam==1: theta_best = theta plt.plot(Js) plt.xlabel('iterations') plt.ylabel('cost: J') plt.legend(['alpha: %s' %i for i in alphas]) plt.show() print 'best theta in alpha:\n ', theta_best test = x0[-1] test.shape=test.size,1 test = polynomial_linear(test) print 'predict of %s is %s' %(test, predict_linear(theta, test))
def plot_2d_sub(x,y,data_set_ids=None,alpha=1,title=None,sizes=None): if sizes is None: sizes = 60 pl.close() fig = pl.figure(4) if data_set_ids is None: data_set_ids = np.zeros(y.size) u = np.unique(data_set_ids) fig.suptitle(title) min_x = x.min() max_x = x.max() for index, val in enumerate(u): ''' if index == 0: ax1 = pl.subplot(len(u),1,index+1) else: pl.subplot(len(u),1,index+1,sharex=ax1,sharey=ax1) ''' ax = pl.subplot(len(u), 1, index + 1) #pl.title(title) inds = data_set_ids == val inds = inds.squeeze() pl.ylabel(str(val)) pl.scatter(x[inds],y[inds],alpha=alpha,c='r',s=sizes,) ax.set_xlim([min_x, max_x]) move_fig(fig) pl.show(block=True) pass
def test1(): x = [0.5]*3 xbounds = [(-5, 5) for y in x] GA = GenAlg(fitcalc1, x, xbounds, popMult=100, bitsPerGene=9, mutation=(1./9.), crossover=0.65, crossN=2, direction='min', maxGens=60, hammingDist=False) results = GA.run() print "*** DONE ***" #print results plt.ioff() #generate pareto frontier numerically x1_ = np.arange(-5., 0., 0.05) x2_ = np.arange(-5., 0., 0.05) x3_ = np.arange(-5., 0., 0.05) pfn = [] for x1 in x1_: for x2 in x2_: for x3 in x3_: pfn.append(fitcalc1([x1,x2,x3])) pfn.sort(key=lambda x:x[0]) plt.figure() i = 0 for x in results: plt.scatter(x[1][0], x[1][1], 20, c='r') plt.scatter([x[0] for x in pfn], [x[1] for x in pfn], 1.0, c='b', alpha=0.1) plt.xlim([-20,-1]) plt.ylim([-12, 2]) plt.draw()
def colorPlot(): RA=[] DEC=[] FWHM=[] e1=[] e2=[] e=[] FWHM_max=0 FWHM_min=1 f=open("star_info.txt") i=0 for line in f.readlines(): temp= line.split() RA.append(float(temp[0])) DEC.append(float(temp[1])) e1.append(float(temp[2])) e2.append(float(temp[3])) e.append(np.sqrt(e1[-1]**2+e2[-1]**2)) FWHM.append(float(temp[6])) if e[-1]>FWHM_max: FWHM_max=e[-1] if e[-1]<FWHM_min: FWHM_min=e[-1] i=i+1 # if i>3000: # break for i in range(len(RA)): plt.scatter(RA[i],DEC[i], color=str(float(e[i]-FWHM_min)/FWHM_max)) # if i>3000: # break plt.show()
def plot_knowledge_count(agent_network, filename): word_dict = dict() agent_list = agent_network.get_all_agents() for agent_item in agent_list: for word in agent_item.knowledge: if word not in word_dict: word_dict[word] = 0 word_dict[word] = word_dict[word] + 1 word_count_tuple_list = word_dict.items() word_count_tuple_list = sorted(word_count_tuple_list, key=itemgetter(1)) print word_count_tuple_list x = list() y = list() for item in word_count_tuple_list: word = item[0] count = item[1] x.append(word) y.append(count) plt.scatter(x, y, s=30, vmin = 0, vmax= 100, alpha=0.5) plt.savefig(filename)
def tsne_2D(inputs, colors=None, labels=None, initial_dims = 50, perplexity = 30.0): """ Plots in 2D a set of points (the rows of NumPy 2D array ``inputs``), using t-SNE. A color coding can be specified with option ``colors`` (e.g. ['b','r','k','k'] would yield one blue, one red and two black points). String labels for each data point can also be provided. ``initial_dims`` and ``perplexity`` are hyper-parameters of t-SNE. This function requires t-SNE python code prodived by Laurens van der Maaten (see README in mlpython/misc/third_party/tsne/). """ try: import mlpython.misc.third_party.tsne.tsne as tsne except: import warnings warnings.warn('tsne_2D requires the t-SNE python code prodived by Laurens van der Maaten. See mlpython/misc/third_party/tsne/README for instructions.') return Y = tsne.tsne(inputs, 2, 50, 20.0); if colors is None: colors = 'k' scatter(Y[:,0], Y[:,1], 20, colors); if labels is not None: for x,y,l in zip(Y[:,0],Y[:,1],labels): text(x,y,l)
def fit(w, f, e, mw, mf, vgrid, npol, sigrange=None, vrange=None, doppler=doppler, plot=False): vgrid = Quantity(vgrid, u.km/u.s) chi2 = Table([vgrid.value, np.zeros_like(vgrid.value)], names=['v','chi2']) chi2['v'].units = vgrid.unit fit1 = Fit1(w, f, e, mw, mf, npol, doppler) chi2['chi2'] = np.array([fit1(v)[0] for v in vgrid]) chi2.meta['ndata'] = len(f) chi2.meta['npar'] = npol+1+1 chi2.meta['ndof'] = chi2.meta['ndata']-chi2.meta['npar'] if plot: import matplotlib.pylab as plt plt.scatter(chi2['v'], chi2['chi2']) if vrange is None and sigrange is None or len(vgrid) < 3: ibest = chi2['chi2'].argmin() vbest, bestchi2 = chi2[ibest] chi2.meta['vbest'] = vbest chi2.meta['verr'] = 0. chi2.meta['bestchi2'] = bestchi2 else: vbest, verr, bestchi2 = minchi2(chi2, vrange, sigrange, plot=plot) _, fit, mfi = fit1(vbest) chi2.meta['wmean'] = fit1.wmean chi2.meta['continuum'] = fit1.sol return chi2, fit, mfi
def generate_data(): # Number of observations T = 1e3 # Correlation rho = .9 # True parameter beta = np.array([1., -.5]) # True parameters for instruments gamma = np.array([1, -5, 2, 3, -1]) # Random errors e = np.random.normal(size = (T, 2)) # Instruments Z = np.random.normal(size = (T, 5)) # Endogenous variables X1 = np.dot(Z, gamma) + e[:,0] X2 = np.dot(Z**2, gamma) + e[:,1] X = np.concatenate((X1[:, np.newaxis], X2[:, np.newaxis]), axis = 1) # Dependent variable Y = np.dot(X, beta) + e[:,0] + rho * e[:,1] print(X.shape, Y.shape) plt.scatter(X[:,0], Y) plt.show() return Y, X, Z
def fit_plot_unlabeled_data(unlabeled_data_x, labeled_data_x, labeled_data_y, fit_order, data_type, other_data_list, other_data_name): output = open('predictions.csv','wb') coeffs = np.polyfit(labeled_data_x, labeled_data_y, fit_order) #does poly git to nth deg on labeled data fit_eq = np.poly1d(coeffs) #Eqn from fit predicted_y = fit_eq(unlabeled_data_x) i = 0 writer = csv.writer(output,delimiter=',') header = [str(data_type),str(other_data_name),'Predicted_Num_Inc'] writer.writerow(header) while i < len(predicted_y): output_data = [unlabeled_data_x[i],other_data_list[i],predicted_y[i]] writer.writerow(output_data) print 'For '+str(data_type)+' of: '+str(unlabeled_data_x[i])+', Predicted Number of Incidents is: '+str(predicted_y[i]) i = i + 1 plt.scatter(unlabeled_data_x, predicted_y, color='blue', label='Predicted Number of Incidents') fit_line_x = np.arange(min(unlabeled_data_x), max(unlabeled_data_x), 1) plt.plot(fit_line_x, fit_eq(fit_line_x), color='red',linestyle='dashed',label=' Order '+str(fit_order)+' Polynomial Fit') #____Use below line to plot actual data also!! #plt.scatter(labeled_data_x, labeled_data_y, color='green', label='Actual Incident Report Data') plt.title('Predicted Number of 311 Incidents by '+str(data_type)) plt.xlabel(str(data_type)) plt.ylabel('Number of 311 Incidents') plt.grid() plt.xlim([min(unlabeled_data_x)-1500, max(unlabeled_data_x)+1500]) plt.legend(loc='upper left') plt.show()
def nova_plot(): erg2mev=624151. fig=plot.figure() yrange = [1e-6,2e-4] xrange = [1e-1,1e5] plot.fill_between([0.2,10e3],[yrange[1],yrange[1]],[yrange[0],yrange[0]],facecolor='yellow',interpolate=True,color='yellow',alpha=0.5) plot.annotate('AMEGO',xy=(3,9e-5),xycoords='data',fontsize=26,color='black') lat=ascii.read("data/NMon2012.LAT.dat",names=['energy','en_low','en_high','flux','flux_err','tmp']) plot.scatter(lat['energy'],lat['flux']*erg2mev,color='red') plot.errorbar(lat['energy'],lat['flux']*erg2mev,xerr=[lat['en_low'],lat['en_high']],yerr=lat['flux_err']*erg2mev,ecolor='red',capsize=0,fmt='none') latul=ascii.read("data/NMon2012.LAT.limits.dat",names=['energy','en_low','en_high','flux','tmp1','tmp2','tmp3','tmp4']) plot.errorbar(latul['energy'],latul['flux']*erg2mev,xerr=[latul['en_low'],latul['en_high']],yerr=0.5*latul['flux']*erg2mev,uplims=True,ecolor='red',capsize=0,fmt='none') plot.scatter(latul['energy'],latul['flux']*erg2mev,color='red') leptonic=ascii.read("data/sp-NMon12-IC-best-fit-1MeV-30GeV.txt",names=['energy','flux'],data_start=1) hadronic=ascii.read("data/sp-NMon12-pi0-and-secondaries.txt",names=['energy','flux1','flux2'],data_start=1) plot.plot(leptonic['energy'],leptonic['flux']*erg2mev,'r--',color='black',lw=2,label='Leptonic') plot.plot(hadronic['energy'],hadronic['flux2']*erg2mev,color='black',lw=2,label='Hadronic+Secondary Leptons') plot.legend(loc='upper right',fontsize='small',frameon=False,framealpha=0.5) plot.xscale('log') plot.yscale('log') plot.ylim(yrange) plot.xlim(xrange) plot.xlabel(r'Energy (MeV)') plot.ylabel(r'Energy$^2 \times $ Flux (Energy) (erg cm$^{-2}$ s$^{-1}$)') plot.title('Nova V339 Del 2013') plot.savefig('Nova_SED.png', bbox_inches='tight') plot.savefig('Nova_SED.eps', bbox_inches='tight') plot.show() plot.close()
def scipy_stuff(): from scipy.interpolate import griddata from matplotlib import pylab import cPickle as pickle print "loading points" points, x_diff, y_diff = pickle.load(open("temp_data.pickle", "rb")) y_pts, x_pts = zip(*points) print "Creating grid points" grid_points = [] for j in range(2500): for i in range(2500): grid_points.append((j, i)) print "Gridding data" x_grid = griddata(points, x_diff, grid_points) y_grid = griddata(points, y_diff, grid_points) x_grid.shape = (2500, 2500) y_grid.shape = (2500, 2500) print "Plotting" pylab.subplot(3, 1, 1) pylab.imshow(x_grid) pylab.subplot(3, 1, 2) pylab.imshow(y_grid) pylab.subplot(3, 1, 3) pylab.scatter(x_pts, y_pts) pylab.show()
def plot_approx_error(r, error): plt.scatter(r, error, c='g', alpha=0.5) plt.title('Rank r Approximation Error') plt.xlabel('r') plt.ylabel('Frobenius Norm') plt.show() return
def plot_rfs(size, C, Rx, Ry, color='b'): radius = np.sqrt(size[...]/np.pi) a, w = 0, C.shape[0] plt.scatter(Rx, Ry, s=15, color='w', edgecolor='k') plt.scatter(C[a:w, 1], C[a:w, 0], s=radius*500, alpha=0.4, color=color) plt.xticks([]) plt.yticks([])
def _gaussian_test(): import matplotlib.pyplot as plt n = 10000 mu_x = 0.0 mu_y = 0.0 #sig_x, sig_y = 1.5, 1.5 tau = 0.0 seeing = 1.5 sigma = seeing / (2. * np.sqrt(2. * np.e)) slit_width = 0.2 slit_height = 10.0 slit_x = np.empty(n, dtype=np.float64) slit_y = np.empty(n, dtype=np.float64) slit_x, slit_y = slit_gaussian_psf(n, mu_x, mu_y, sigma, sigma, tau, slit_width, slit_height) log.info("x range: [%s, %s]", slit_x.min(), slit_x.max()) log.info("y range: [%s, %s]", slit_y.min(), slit_y.max()) plt.scatter(slit_x, slit_y, alpha=0.8) plt.fill([-slit_width/2, slit_width/2, slit_width/2, -slit_width/2], [-slit_height/2, -slit_height/2, slit_height/2, slit_height/2], 'r', alpha=0.10, edgecolor='k') plt.gca().set_aspect("equal") plt.title("Gaussian distribution") plt.xlim([-slit_height/2., slit_height/2]) plt.show()
def Decision_Surface(data, target, model, surface=True, probabilities=False, cell_size=.01): # Get bounds x_min, x_max = data[data.columns[0]].min(), data[data.columns[0]].max() y_min, y_max = data[data.columns[1]].min(), data[data.columns[1]].max() # Create a mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, cell_size), np.arange(y_min, y_max, cell_size)) meshed_data = pd.DataFrame(np.c_[xx.ravel(), yy.ravel()]) # Add interactions for i in range(data.shape[1]): if i <= 1: continue meshed_data = np.c_[meshed_data, np.power(xx.ravel(), i)] if model != None: # Predict on the mesh if probabilities: Z = model.predict_proba(meshed_data)[:, 1].reshape(xx.shape) else: Z = model.predict(meshed_data).reshape(xx.shape) # Plot mesh and data if data.shape[1] > 2: plt.title("humor^(" + str(range(1,data.shape[1])) + ") and number_pets") else: plt.title("humor and number_pets") plt.xlabel("humor") plt.ylabel("number_pets") if surface and model != None: cs = plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.4) color = ["blue" if t == 0 else "red" for t in target] plt.scatter(data[data.columns[0]], data[data.columns[1]], color=color)
def plot2Dnet(): allCells = f.net.allCells figure(figsize=(12,12)) colorList = [[0.42,0.67,0.84], [0.90,0.76,0.00], [0.42,0.83,0.59], [0.90,0.32,0.00], [0.34,0.67,0.67], [0.90,0.59,0.00], [0.42,0.82,0.83], [1.00,0.85,0.00], [0.33,0.67,0.47], [1.00,0.38,0.60], [0.57,0.67,0.33], [0.5,0.2,0.0], [0.71,0.82,0.41], [0.0,0.2,0.5]] popLabels = [pop.tags['popLabel'] for pop in f.net.pops if pop.tags['cellModel'] not in ['NetStim']] popColors = {popLabel: colorList[ipop%len(colorList)] for ipop,popLabel in enumerate(popLabels)} # dict with color for each pop cellColors = [popColors[cell.tags['popLabel']] for cell in f.net.cells] posX = [cell['tags']['x'] for cell in allCells] # get all x positions posY = [cell['tags']['y'] for cell in allCells] # get all y positions scatter(posX, posY, s=60, color = cellColors) # plot cell soma positions for postCell in allCells: for con in postCell['conns']: # plot connections between cells posXpre,posYpre = next(((cell['tags']['x'],cell['tags']['y']) for cell in allCells if cell['gid']==con['preGid']), None) posXpost,posYpost = postCell['tags']['x'], postCell['tags']['y'] color='red' if con['synMech'] in ['inh', 'GABA', 'GABAA', 'GABAB']: color = 'blue' width = 0.1 #50*con['weight'] plot([posXpre, posXpost], [posYpre, posYpost], color=color, linewidth=width) # plot line from pre to post xlabel('x (um)') ylabel('y (um)') xlim([min(posX)-0.05*max(posX),1.05*max(posX)]) ylim([min(posY)-0.05*max(posY),1.05*max(posY)]) fontsiz = 12 for popLabel in popLabels: plot(0,0,color=popColors[popLabel],label=popLabel) legend(fontsize=fontsiz, bbox_to_anchor=(1.02, 1), loc=2, borderaxespad=0.) ax = gca() ax.invert_yaxis()
def plot_prediction_accuracy(x, y): plt.scatter(x, y, c='g', alpha=0.5) plt.title('Logistic Regression') plt.xlabel('r') plt.ylabel('Prediction Accuracy') plt.xlim(0,200) plt.show()
def visualization2(self, sp_to_vis=None): if sp_to_vis: species_ready = list(set(sp_to_vis).intersection(self.all_sp_signatures.keys())) else: raise Exception('list of driver species must be defined') if not species_ready: raise Exception('None of the input species is a driver') for sp in species_ready: # Setting up figure plt.figure() plt.subplot(313) mon_val = OrderedDict() signature = self.all_sp_signatures[sp] for idx, mon in enumerate(list(set(signature))): if mon[0] == 'C': mon_val[self.all_comb[sp][mon] + (-1,)] = idx else: mon_val[self.all_comb[sp][mon]] = idx mon_rep = [0] * len(signature) for i, m in enumerate(signature): if m[0] == 'C': mon_rep[i] = mon_val[self.all_comb[sp][m] + (-1,)] else: mon_rep[i] = mon_val[self.all_comb[sp][m]] # mon_rep = [mon_val[self.all_comb[sp][m]] for m in signature] y_pos = numpy.arange(len(mon_val.keys())) plt.scatter(self.tspan[1:], mon_rep) plt.yticks(y_pos, mon_val.keys()) plt.ylabel('Monomials', fontsize=16) plt.xlabel('Time(s)', fontsize=16) plt.xlim(0, self.tspan[-1]) plt.ylim(0, max(y_pos)) plt.subplot(312) for name in self.model.odes[sp].as_coefficients_dict(): mon = name mon = mon.subs(self.param_values) var_to_study = [atom for atom in mon.atoms(sympy.Symbol)] arg_f1 = [numpy.maximum(self.mach_eps, self.y[str(va)][1:]) for va in var_to_study] f1 = sympy.lambdify(var_to_study, mon) mon_values = f1(*arg_f1) mon_name = str(name).partition('__')[2] plt.plot(self.tspan[1:], mon_values, label=mon_name) plt.ylabel('Rate(m/sec)', fontsize=16) plt.legend(bbox_to_anchor=(-0.1, 0.85), loc='upper right', ncol=1) plt.subplot(311) plt.plot(self.tspan[1:], self.y['__s%d' % sp][1:], label=parse_name(self.model.species[sp])) plt.ylabel('Molecules', fontsize=16) plt.legend(bbox_to_anchor=(-0.15, 0.85), loc='upper right', ncol=1) plt.suptitle('Tropicalization' + ' ' + str(self.model.species[sp])) # plt.show() plt.savefig('s%d' % sp + '.png', bbox_inches='tight', dpi=400)
def plot(y, function): """ Show an animation of Poincare plot. --- arguments --- y: A list of initial values function: function which is argument of Runge-Kutta solver """ h = dt fig = plt.figure() ax = fig.add_subplot(111) ax.grid() time_text = ax.text(0.05, 0.9, '', transform=ax.transAxes) plt.ion() for i in range(nmax + 1): for j in range(nstep): rk4 = RK.RK4(function) y = rk4.solve(y, j * h, h) # -pi <= theta <= pi while y[0] > pi: y[0] = y[0] - 2 * pi while y[0] < -pi: y[0] = y[0] + 2 * pi if ntransient <= i < nmax: # <-- draw the poincare plots plt.scatter(y[0], y[1], s=2.0, marker='o', color='blue') time_text.set_text('n = %d' % i) plt.draw() if i == nmax: # <-- to stop the interactive mode plt.ioff() plt.scatter(y[0], y[1], s=2.0, marker='o', color='blue') time_text.set_text('n = %d' % i) plt.show()
def show_ratios(cpu): cpu.ratios.sort(key=lambda x: x[0]) pl.figure("Tuning") pl.plot([x[0] for x in cpu.ratios], [x[2] for x in cpu.ratios]) pl.figure("Tuning samples") pl.scatter([x[0] for x in cpu.ratios], [x[2] * x[0] for x in cpu.ratios])
def scatter(title, file_name, x_array, y_array, size_array, x_label, \ y_label, x_range, y_range, print_pdf): ''' Plots the given x value array and y value array with the specified title and saves with the specified file name. The size of points on the map are proportional to the values given in size_array. If print_pdf value is 1, the image is also written to pdf file. Otherwise it is only written to png file. ''' rc('text', usetex=True) rc('font', family='serif') plt.clf() # clear the ploting window, a must. plt.scatter(x_array, y_array, s = size_array, c = 'b', marker = 'o', alpha = 0.4) if x_label != None: plt.xlabel(x_label) if y_label != None: plt.ylabel(y_label) plt.axis ([0, x_range, 0, y_range]) plt.grid(True) plt.suptitle(title) Plotter.print_to_png(plt, file_name) if print_pdf: Plotter.print_to_pdf(plt, file_name)
def draw_solution(start_node, final_node=None): ax = plt.gca() def draw_path(u, v, arrow_length=.01, color=(.8, .8, .8), lw=1): du = u.direction plt.arrow(u.pose[X], u.pose[Y], du[0] * arrow_length, du[1] * arrow_length, head_width=.005, head_length=.01, fc=color, ec=color) dv = v.direction plt.arrow(v.pose[X], v.pose[Y], dv[0] * arrow_length, dv[1] * arrow_length, head_width=.005, head_length=.01, fc=color, ec=color) center, radius = find_circle(u, v) du = u.position - center theta1 = np.arctan2(du[1], du[0]) dv = v.position - center theta2 = np.arctan2(dv[1], dv[0]) # Check if the arc goes clockwise. if np.cross(u.direction, du).item() > 0.: theta1, theta2 = theta2, theta1 ax.add_patch( patches.Arc(center, radius * 2., radius * 2., theta1=theta1 / np.pi * 180., theta2=theta2 / np.pi * 180., color=color, lw=lw)) return abs((theta2 - theta1) * radius) # points = [] # s = [(start_node, None)] # (node, parent). # while s: # v, u = s.pop() # if hasattr(v, 'visited'): # continue # v.visited = True # # Draw path from u to v. # if u is not None: # draw_path(u, v) # points.append(v.pose[:2]) # for w in v.neighbors: # s.append((w, v)) # # points = np.array(points) # plt.scatter(points[:, 0], points[:, 1], s=10, marker='o', color=(.8, .8, .8)) length = 0 if final_node is not None: plt.scatter(final_node.position[0], final_node.position[1], s=10, marker='o', color='k') # Draw final path. v = final_node while v.parent is not None: d = draw_path(v.parent, v, color='k', lw=2) v = v.parent length += d return length
print("rho: ", stat_spearman) print("tau: ", stat_kendall) print("score: ", ridge.score(pca_x, target)) # needed to work properly predicted = predicted.reshape(-1, 1) # A linear regression is needed to visually compare predicted age vs real age LR = LinearRegression().fit(predicted, target) predicted = LR.predict(predicted) # needed to work properly predicted = predicted.reshape(-1, 1) # plot plt.scatter(target, predicted, s=3, color="black") plt.title("Regression after PCA") plt.plot([min(target), max(target)], [ min(target) * LR.coef_[0] + LR.intercept_, max(target) * LR.coef_[0] + LR.intercept_ ], 'r-.', label="Regression line Y = " + str(LR.coef_[0])[:5] + "*X + " + str(LR.intercept_)[:6]) plt.xlabel("Real age (years)") plt.ylabel("Predicted age (years)") plt.grid() plt.legend()
for j in range(2, 10): data[i][j - 2] = rows[j] data1 = vq.whiten(data) Ave = sch.linkage(data1, method='average') P = sch.dendrogram(Ave) plt.xlabel('类别标签') plt.ylabel('距离') plt.title('类平均法') plt.show() Ward = sch.linkage(data1, method='ward') P = sch.dendrogram(Ward) plt.xlabel('类别标签') plt.ylabel('距离') plt.title('离差平方和法') plt.show() kmeans_cent = vq.kmeans(data1, 5) print('聚类中心为:\n', kmeans_cent[0]) p = plt.figure(figsize=(16, 16)) plt.title('聚类中心散点图') for i in range(8): for j in range(8): ax = p.add_subplot(8, 8, i * 8 + 1 + j) plt.scatter(data1[:, j], data1[:, i]) plt.scatter(kmeans_cent[0][:, j], kmeans_cent[0][:, i], c='r') plt.show()
d = 0 for i in Data['Creatinin'].values: s = CU_SUM[t][-1] + (i - mean) CU_SUM[t].append(s) res = pd.DataFrame([Data.index[d], c, s]).T res.columns = columns_C CU_SUM_Mid_C = CU_SUM_Mid_C.append(res) d = d + 1 t = t + 1 CU_SUM_Mid_C.to_csv("D:\\Simulation Model\\Change Point Analysis\\Data\\Creatinin_CUSUM.csv") #Troponin and Creatinin Troponin_Creatnin = pd.read_csv("D:\\Simulation Model\\Change Point Analysis\\Data\\Troponin_CreatninFinal.csv") Case = Troponin_Creatnin[Troponin_Creatnin.Patient_ID == 439505] plt.scatter(x = np.log(Troponin_Creatnin.Troponin), y = np.log(Troponin_Creatnin.Creatinin)) markers = ['.','o','v','^', '<', '>', '1', '2','3','4','8','s','p','*','h','H','+','x','D','d','|','_'] color_list = [] for name, hex in matplotlib.colors.cnames.items(): color_list.append(name) plt.figure(figsize=(14, 8)) plt.xticks(fontsize=14) plt.yticks(rotation = 'vertical', fontsize=14) plt.xlabel('Time', fontsize=14) plt.ylabel('CUMSUM', fontsize=14) #Patient Information Patient_Info = pd.read_csv("D:\\Simulation Model\\Change Point Analysis\\Data\\Patient Information2.csv") ACS_ID = Patient_Info[Patient_Info.patient_id.isin(CU_SUM_Mid_T.Patient_ID)]
cost_1.append(cost(X, y, theta)) return theta, cost_1 if __name__ == '__main__': '''散点图''' data = pd.read_csv('ex2data1.txt', header=None, names=['exam1', 'exam2', 'admitted']) data.insert(0, 'Ones', 1) print(data.describe()) positive = data[data['admitted'].isin(['1'])] negetive = data[data['admitted'].isin(['0'])] plt.scatter(positive['exam1'], positive['exam2'], marker='x', c='black', label='admitted') plt.scatter(negetive['exam1'], negetive['exam2'], marker='o', c='red', label='not admintted') plt.legend(loc='upper right') #图例位置 # plt.show() '''损失函数、梯度下降法''' X, y = np.array(data.iloc[:, :-1]), np.array(data.iloc[:, -1:]) theta = np.zeros((1, 3)) print(X.shape, y.shape, theta.shape) alpha, epoch = 0.001, 500000 theta, cost_1 = gradientDescent(X, y, theta, alpha, epoch)
import matplotlib.pylab as plt import numpy as np import pandas as pd import seaborn as sns from math import cos, sin, log, tan, gamma, pi, exp, sqrt p = plt.figure(figsize=(14, 14), facecolor='black', dpi=600) p = plt.axis('off') def iterator(r, x0, n): y = [x0] for z in range(n): y.append(r * (y[-1]) * (1 - y[-1])) return (y) n = 3000 for z, i in zip(np.linspace(1, 4, n), range(n)): lista = iterator(z, 0.8, 100) plt.scatter(np.linspace(0.5, 4, 10 * n)[i * 10:(i + 1) * 10], lista[-10:], s=0.7, color='w') plt.savefig(f'C:/Users/Alejandro/Pictures/RandomPlots/29012020.png', facecolor='black')
def plot_ifa_parameters_and_ppc(estimated_parameters, true_parameters, sess): map_estimates = dict(estimated_parameters) #map_estimates.pop('sources') true_parameters_vars = true_parameters.copy() map_sources = map_estimates.pop('sources') #map_estimates = sess.run(map_estimates) n_observations = true_parameters['data'].shape[0] #testmodel,source = centeredIndependentFactorAnalysisTest2(n_observations=n_observations, **map_estimates) testmodel, source, data_mean = centeredIndependentFactorAnalysisTest( n_observations=n_observations, mc_samples=1, **map_estimates) #print(sess.run(map_estimates['data_var'])) #print(sess.run(source.distribution.sample((5000))).var(0)) ppc = sess.run(testmodel.distribution.sample()) #plt.title('True source distributions') plot_source_distributions(true_parameters['mixture_component_var'], true_parameters['mixture_weights'], sess) #plt.title('Estimated source distributions') plot_source_distributions(map_estimates['mixture_component_var'], map_estimates['mixture_weights'], sess) fig, ax = plt.subplots() plt.title('Variance of sample is {}'.format(true_parameters['data'].var())) plt.scatter(*true_parameters['data'].T, alpha=.5) xlim = ax.get_xlim() ylim = ax.get_ylim() plt.show() #fig, ax = plt.subplots() #plt.title('Variance of sample is {}'.format(ppc.var())) #plt.scatter(*ppc.T, alpha=.5,c='orange') #ax.set_xlim(xlim) #ax.set_ylim(ylim) #plt.show() fig, ax = plt.subplots() plt.title('Variance of sample is {}'.format(ppc.var())) plt.scatter(*true_parameters['data'].T, alpha=.5) plt.scatter(*ppc.T, alpha=.5) ax.set_xlim(xlim) ax.set_ylim(ylim) plt.show() fig, ax = plt.subplots() plt.title('True and estimated sources') plt.scatter(*true_parameters['sources'].T) #map_sources = sess.run(tf.squeeze(source.distribution.sample((n_observations)))) n_sources = map_sources.shape[1] plt.scatter(*map_sources.T) ax.set_xlim(xlim) ax.set_ylim(ylim) plt.show() fig, ax = plt.subplots() plt.title('True and estimated data_mean') data_mean_s = sess.run(tf.squeeze(data_mean)) true_data_mean = np.einsum( 'ik,kj->ij', true_parameters['sources'], true_parameters['factor_loadings'] / np.linalg.norm( true_parameters['factor_loadings'], axis=1, keepdims=True)) plt.scatter(*true_data_mean.T) plt.scatter(*data_mean_s.T) ax.set_xlim(xlim) ax.set_ylim(ylim) plt.show() n_components_in_mixture = true_parameters['mixture_component_var'].shape[1] for source in range(n_sources): plt.title('mixture component variances, true and estimated') plt.bar(np.arange(n_components_in_mixture), true_parameters_vars['mixture_component_var'][source, :]) plt.bar(np.arange(n_components_in_mixture), map_estimates['mixture_component_var'][source, :], alpha=.5) plt.show() for source in range(n_sources): plt.title('mixing weights, true and estimated') plt.bar(np.arange(n_components_in_mixture), true_parameters_vars['mixture_weights'][source, :]) plt.bar(np.arange(n_components_in_mixture), map_estimates['mixture_weights'][source, :], alpha=.5) plt.show() fgen = true_parameters['factor_loadings'] fpred = map_estimates['factor_loadings'] fig, ax = plt.subplots() ax.scatter(*true_parameters['data'].T, alpha=0.3) xlim = ax.get_xlim() ylim = ax.get_ylim() for fg in fgen: plt.plot(fg[0] * np.array([1, -1]), fg[1] * np.array([1, -1]), color='y', label='true') for fg in fpred: plt.plot(fg[0] * np.array([1, -1]), fg[1] * np.array([1, -1]), color='r', linestyle='-.', label='predicted') #for fg in fica_n: # plt.plot(fg[0]*np.array([1,-1]),fg[1]*np.array([1,-1]),color='k',linestyle='--',label='initial') plt.legend() ax.set_xlim(xlim) ax.set_ylim(ylim)
def visualize(trainX, trainY): color=[colors[int(i)] for i in trainY] plb.scatter(trainX[:,0],trainX[:,1],c=color) plb.xlabel('x') plb.ylabel('y') plb.show()
elem += 1 fraction = np.zeros(max_speed, dtype=np.float32) for i in range(start_speed, len(fraction)): if (np.float32(num_below_thres[i - start_speed] + num_over_thres[i - start_speed]) > 0): fraction[i] = np.float32(num_below_thres[i - start_speed]) / ( num_below_thres[i - start_speed] + num_over_thres[i - start_speed]) else: fraction[i] = -1 print fraction figure = plt.figure(figsize=(15, 10)) plot_abs = plt.subplot(2, 2, 1) plt.title("Measurements") plt.scatter(speed, score, color="b") plt.xlim([-1, max_speed]) plt.xlabel("Windspeed [m/s]") plt.ylim([-2, 32]) plt.ylabel("Correct Score [MW]") plot_scatter = plt.subplot(2, 2, 2) plt.title("KNN Interpolation of the Response Curve") plt.plot(T, Y_hat, color='b') plt.scatter(speed, score, color="#CCCCCC") plt.xlim([-1, max_speed]) plt.xlabel("Windspeed [m/s]") plt.ylim([-2, 32]) plt.ylabel("Correct Score [MW]") plot_abs = plt.subplot(2, 2, 3)
print('dlkjfkldfd') dummies = np.empty(739) dummies[0:100] = 9 dummies[100:200] = 8 dummies[200:300] = 7 dummies[300:400] = 6 dummies[400:500] = 5 dummies[500:600] = 4 dummies[600:700] = 3 dummies[700:739] = 2 #print(cols.index("red")) plt.scatter(x[0:100], dummies[0:100], s=10, c=cols, label='Monoysyllables') plt.scatter(x[0:100], dummies[100:200], s=10, c=cols[100:200]) plt.scatter(x[0:100], dummies[200:300], s=10, c=cols[200:300]) plt.scatter(x[0:100], dummies[300:400], s=10, c=cols[300:400]) plt.scatter(x[0:100], dummies[400:500], s=10, c=cols[400:500]) plt.scatter(x[0:100], dummies[500:600], s=10, c=cols[500:600]) plt.scatter(x[0:100], dummies[600:700], s=10, c=cols[600:700]) plt.scatter(x[0:39], dummies[700:739], s=10, c=cols[400:500]) plt.scatter(x[0], dummies[500], s=10, color='red', label='Polysyllables') frame1 = plt.gca() # frame1.legend(('monosyllables', 'polysyllables')) frame1.axes.get_yaxis().set_visible(False) frame1.axes.get_xaxis().set_visible(False)
import matplotlib.pylab as plt import numpy as np import pandas as pd from scipy.stats import norm import seaborn as sns from math import cos, sin, log, tan, gamma, pi, exp, sqrt, cosh, sinh p = plt.figure(figsize=(14, 14), facecolor='black', dpi=400) p = plt.axis('off') n = 80 for z in list(np.linspace(0, pi, n)): plt.scatter([cos(x * z) for x in np.linspace(0, 1, n)], [x for x in np.linspace(0, 10, n)], alpha=0.8, color=[ plt.cm.rainbow(np.random.uniform(0, z / (pi))) for _ in range(n) ], s=17) plt.scatter([-cos(x * z) for x in np.linspace(0, 1, n)], [x + 10 for x in -np.linspace(0, 10, n)], alpha=0.8, color=[ plt.cm.rainbow(np.random.uniform(0, z / (pi))) for _ in range(n) ], s=17) plt.savefig(f'C:/Users/Alejandro/Pictures/RandomPlots/19022020.png', facecolor='black')
cols = ( ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']) # numeric features #for c in cols: # train.hist(column=c, bins=50) # # train.boxplot(column=c, by = 'Gender') pd.crosstab(train['Education'], train['Gender'], margins=True, normalize='columns') pd.crosstab(train['Credit_History'], train['Property_Area'], margins=True, normalize='columns') plt.scatter(train['LoanAmount'], train['Credit_History']) plt.show() # Impute missing values # --------------------- train_mod = train.copy() test_mod = test.copy() # Exclude observations with missing 'Credit_History' train_mod = train_mod.dropna(subset=['Credit_History']).reset_index() # Impute 'LoanAmount' with median values train_mod['LoanAmount'] = train_mod['LoanAmount'].fillna( train_mod['LoanAmount'].median()) test_mod['LoanAmount'] = test_mod['LoanAmount'].fillna( test_mod['LoanAmount'].median())
def biplot(objects, eigenvectors=None, eigenvalues=None, vector_labels=None, object_labels=None, scaling=1, xpc=0, ypc=1, show_arrows=True, group=None, plot_ellipses=False, confidense_level=0.95, axis_label='PC', arrow_head_width=None): """ Creates a biplot with: Parameters: objects: 2D numpy array of scores eigenvectors: 2D numpy array of loadings eigenvalues: 1D numpy array of eigenvalues, necessary to compute correlation biplot_scores vector_labels: 1D numpy array or list of labels for loadings object_labels: 1D numpy array or list of labels for objects show_arrows: logical scaling: either 1 or "distance" for distance biplot, either 2 or "correlation" for correlation biplot xpc, ypc: integers, index of the axis to plot. generally xpc=0 and ypc=1 to plot the first and second components group: 1D numpy array of categories to color scores plot_ellipses: 2D numpy array of error (mean) and deviation (samples) ellipses around groups confidense_level: confidense level for the ellipses axis_label: string, the text describing the axes Returns: biplot as matplotlib object """ # select scaling if scaling == 1 or scaling == 'distance': scores = objects loadings = eigenvectors elif scaling == 2 or scaling == 'correlation': scores = objects.dot(np.diag(eigenvalues**(-0.5))) loadings = eigenvectors.dot(np.diag(eigenvalues**0.5)) else: raise ValueError("No such scaling") if eigenvectors is None: loadings=np.array([[0, 0]]) # to include in the computation of plot limits # draw the cross plt.axvline(0, ls='solid', c='grey', linewidth=0.5) plt.axhline(0, ls='solid', c='grey', linewidth=0.5) # draw the ellipses if group is not None and plot_ellipses: groups = np.unique(group) for i in range(len(groups)): mean = np.mean(scores[group==groups[i], :], axis=0) plt.text(mean[0], mean[1], groups[i], ha='center', va='center', color='k', size=15) ell_dev = ellipse(X=scores[group==groups[i], :], level=confidense_level, method='deviation') ell_err = ellipse(X=scores[group==groups[i], :], level=confidense_level, method='error') plt.fill(ell_err[:,0], ell_err[:,1], alpha=0.6, color='grey') plt.fill(ell_dev[:,0], ell_dev[:,1], alpha=0.2, color='grey') # plot scores if group is None: if object_labels is None: plt.scatter(scores[:,xpc], scores[:,ypc]) else: for i in range(scores.shape[0]): #print('i=', i) #print(scores[i,xpc], scores[i,ypc]) plt.text(scores[i, xpc], scores[i, ypc], object_labels[i], color = 'blue', ha = 'center', va = 'center') else: if object_labels is None: for i in range(len(np.unique(group))): cond = group == np.unique(group)[i] plt.plot(scores[cond, 0], scores[cond, 1], 'o') else: for i in range(len(np.unique(group))): cond = group == np.unique(group)[i] scores_gr = scores[cond, 0] for j in range(scores_gr.shape[0]): plt.text(scores[j, xpc], scores[j, ypc], object_labels[j], ha = 'center', va = 'center') # plot loadings if eigenvectors is not None: if show_arrows: if arrow_head_width is None: arrow_head_width = np.ptp(objects)/100 for i in range(loadings.shape[0]): plt.arrow(0, 0, loadings[i, xpc], loadings[i, ypc], color = 'black', head_width=arrow_head_width) # plot loading labels if vector_labels is None: plt.plot(loadings[:, xpc], loadings[:, ypc], marker='+', color='red', ls='None') else: if show_arrows: expand_load_text = 1.15 else: expand_load_text = 1 for i in range(loadings.shape[1]): plt.text(loadings[i, xpc]*expand_load_text, loadings[i, ypc]*expand_load_text, vector_labels[i], color = 'black', ha = 'center', va = 'center') # , fontsize=20 # axis labels plt.xlabel(axis_label + str(xpc+1)) plt.ylabel(axis_label + str(ypc+1)) # axis limit xlim = [np.hstack((loadings[:, xpc], scores[:,xpc])).min(), np.hstack((loadings[:, xpc], scores[:,xpc])).max()] margin_x = 0.05*(xlim[1]-xlim[0]) xlim[0]=xlim[0]-margin_x xlim[1]=xlim[1]+margin_x ylim = [np.hstack((loadings[:, ypc], scores[:,ypc])).min(), np.hstack((loadings[:, ypc], scores[:,ypc])).max()] margin_y = 0.05*(ylim[1]-ylim[0]) ylim[0]=ylim[0]-margin_y ylim[1]=ylim[1]+margin_y plt.xlim(xlim) plt.ylim(ylim)
plt.xlabel('Number of clusters') plt.ylabel('Average distance') plt.title('Selecting k with the Elbow Method') # Interpret 3 cluster solution model3 = KMeans(n_clusters=3) model3.fit(clus_train) clusassign = model3.predict(clus_train) # plot clusters from sklearn.decomposition import PCA pca_2 = PCA(2) plot_columns = pca_2.fit_transform(clus_train) plt.scatter( x=plot_columns[:, 0], y=plot_columns[:, 1], c=model3.labels_, ) plt.xlabel('Canonical variable 1') plt.ylabel('Canonical variable 2') plt.title('Scatterplot of Canonical Variables for 3 Clusters') plt.show() clus_train.reset_index(level=0, inplace=True) cluslist = list(clus_train['index']) labels = list(model3.labels_) newlist = dict(zip(cluslist, labels)) newlist newclus = DataFrame.from_dict(newlist, orient='index') newclus newclus.columns = ['cluster']
def plot(data='2018', errorbars=True, nonEssentials=False, \ c404=0., cneib=0., ccontam=0.): if data == '2017': tbl = load17table() elif data == '2018': tbl = load18table() else: print "The value for 'data' must be either '2017' or '2018'." #print tbl # Defining Variables time = tbl['time'] fluxV = tbl['fluxV404'] fluxC = tbl['fluxContam'] fluxC4 = tbl['fluxC4'] fluxN = tbl['fluxNeighbor'] dyV = tbl['fluxErrV404'] dyC = tbl['fluxErrContam'] dyC4 = tbl['fluxErrC4'] dyN = tbl['fluxErrNeighbor'] fluxR = tbl['fluxRandomStar'] dyR = tbl['fluxErrRandomStar'] # Plot of time vs. flux(V404) fig1 = plt.figure(1) fig1.clf() plt.scatter(time, fluxV, alpha=0.4, s=16, zorder=25) if errorbars: plt.errorbar(time, fluxV, yerr=dyV, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Time') plt.ylabel('Flux (V404)') # Plot of time vs. flux(Contaminant) fig2 = plt.figure(2) fig2.clf() plt.scatter(time, fluxC, alpha=0.4, s=16, zorder=25) if errorbars: plt.errorbar(time, fluxC, yerr=dyC, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Time') plt.ylabel('Flux (Contaminant)') # Plot of time vs. flux(C4) fig3 = plt.figure(3) fig3.clf() plt.scatter(time, fluxC4, alpha=0.4, s=16, zorder=25) if errorbars: plt.errorbar(time, fluxC4, yerr=dyC4, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Time') plt.ylabel('Flux (C4)') # Plot of time vs. flux(Neighbor) fig4 = plt.figure(4) fig4.clf() plt.scatter(time, fluxN, alpha=0.4, s=16, zorder=25) if errorbars: plt.errorbar(time, fluxN, yerr=dyN, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Time') plt.ylabel('Flux (Neighbor)') # Plot of Flux(V404) vs. Flux(Contaminant) fig5 = plt.figure(5) fig5.clf() dum5 = plt.scatter(fluxV + c404, fluxC + ccontam, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxV + c404, fluxC + ccontam, xerr=dyV, yerr=dyC, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (V404)') plt.ylabel('Flux (Contaminant)') plt.colorbar(dum5) # let's do a ratio plot if ccontam > 0. and c404 > 0.: absV = fluxV + c404 absC = fluxC + ccontam ratioContam = absC / (absV + absC) unctyRatioSquared = (dyV**2 + dyC**2)/(absV + absC)**2 + \ (dyC/absC)**2 unctyOfRatio = ratioContam * np.sqrt(unctyRatioSquared) # now let's plot this fig55 = plt.figure(55) fig55.clf() dum55 = plt.scatter(absV, ratioContam, alpha=0.4, s=16, \ zorder=25, c=time, cmap='hsv') if errorbars: dum55b = plt.errorbar(absV, ratioContam, xerr=dyV, \ yerr=unctyOfRatio, fmt='o', ms=1, \ ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (V404)') plt.ylabel('(Contaminant / (V404 + contaminant))') plt.colorbar(dum55) # Plot of Flux(V404) vs. Flux(Neighbor) fig6 = plt.figure(6) fig6.clf() dum6 = plt.scatter(fluxV, fluxN, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxV, fluxN, xerr=dyV, yerr=dyN, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (V404)') plt.ylabel('Flux (Neighbor)') plt.colorbar(dum6) # Plot of Flux(V404) vs. Flux(C4) fig7 = plt.figure(7) fig7.clf() dum7 = plt.scatter(fluxV, fluxC4, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxV, fluxC4, xerr=dyV, yerr=dyC4, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (V404)') plt.ylabel('Flux (C4)') plt.colorbar(dum7) # Plot of Flux(Contaminant) vs. Flux(C4) if nonEssentials: fig8 = plt.figure(8) fig8.clf() dum8 = plt.scatter(fluxC, fluxC4, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxC, fluxC4, xerr=dyC, yerr=dyC4, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (Contaminant)') plt.ylabel('Flux (C4)') plt.colorbar(dum8) # Plot of Flux(Contaminant) vs. Flux(Neighbor) fig9 = plt.figure(9) fig9.clf() dum9 = plt.scatter(fluxC, fluxN, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxC, fluxN, xerr=dyC, yerr=dyN, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (Contaminant)') plt.ylabel('Flux (Neighbor)') plt.colorbar(dum9) # Plot of Flux(Neighbor) vs. Flux(C4) fig10 = plt.figure(10) fig10.clf() dum10 = plt.scatter(fluxN, fluxC4, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxN, fluxC4, xerr=dyN, yerr=dyC4, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (Neighbor)') plt.ylabel('Flux (C4)') plt.colorbar(dum10) # Plot of Flux(Random Star) vs. Flux(C4) fig11 = plt.figure(11) fig11.clf() dum11 = plt.scatter(fluxR, fluxC4, alpha=0.4, s=16, zorder=25, c=time, cmap='hsv') if errorbars: plt.errorbar(fluxR, fluxC4, xerr=dyR, yerr=dyC4, fmt='o', ms=1, ecolor='0.3', alpha=0.2, zorder=10) plt.xlabel('Flux (Random Star (#6 on Map))') plt.ylabel('Flux (C4)') plt.colorbar(dum11)
def start_anon(): names = ( 'ID', 'age', 'gender', 'native-country', 'race', 'marital-status', 'workclass', 'occupation', 'income', 'People_Family', 'education', 'GlycoHemoglobin', 'ArmCircum', 'SaggitalAbdominal', 'GripStrength', 'Taking_Insulin', 'Taking_Oral_Agents', 'Eyes_Affected', 'Recent_BP', 'Diabetes', ) categorical = set(( 'gender', 'native-country', 'race', 'marital-status', 'workclass', 'occupation', 'income', 'education', )) df = pd.read_csv("./static/finaldata_pakka.txt") #get the span (range) of values that 'can' be entered in the column def get_spans(df, partition, scale=None): spans = {} for column in df.columns: if column in categorical: span = len(df[column][partition].unique()) else: span = df[column][partition].max() - df[column][partition].min( ) if scale is not None: span = span / scale[column] spans[column] = span return spans #seprate the values as two parts def split(df, partition, column): dfp = df[column][partition] if column in categorical: values = dfp.unique() lv = set(values[:len(values) // 2]) rv = set(values[len(values) // 2:]) return dfp.index[dfp.isin(lv)], dfp.index[dfp.isin(rv)] else: median = dfp.median() dfl = dfp.index[dfp < median] dfr = dfp.index[dfp >= median] return (dfl, dfr) def is_k_anonymous(df, partition, sensitive_column, k=3): if len(partition) < k: return False return True def partition_dataset(df, feature_columns, sensitive_column, scale, is_valid): finished_partitions = [] partitions = [df.index] while partitions: partition = partitions.pop(0) spans = get_spans(df[feature_columns], partition, scale) for column, span in sorted(spans.items(), key=lambda x: -x[1]): lp, rp = split(df, partition, column) if not is_valid(df, lp, sensitive_column) or not is_valid( df, rp, sensitive_column): continue partitions.extend((lp, rp)) break else: finished_partitions.append(partition) return finished_partitions def build_indexes(df): indexes = {} for column in categorical: values = sorted(df[column].unique()) indexes[column] = { x: y for x, y in zip(values, range(len(values))) } return indexes def get_coords(df, column, partition, indexes, offset=0.1): if column in categorical: sv = df[column][partition].sort_values() l, r = indexes[column][sv[ sv.index[0]]], indexes[column][sv[sv.index[-1]]] + 1.0 else: sv = df[column][partition].sort_values() next_value = sv[sv.index[-1]] larger_values = df[df[column] > next_value][column] if len(larger_values) > 0: next_value = larger_values.min() l = sv[sv.index[0]] r = next_value l -= offset r += offset return l, r def get_partition_rects(df, partitions, column_x, column_y, indexes, offsets=[0.1, 0.1]): rects = [] for partition in partitions: xl, xr = get_coords(df, column_x, partition, indexes, offset=offsets[0]) yl, yr = get_coords(df, column_y, partition, indexes, offset=offsets[1]) rects.append(((xl, yl), (xr, yr))) return rects def get_bounds(df, column, indexes, offset=1.0): if column in categorical: return 0 - offset, len(indexes[column]) + offset return df[column].min() - offset, df[column].max() + offset def plot_rects(df, ax, rects, column_x, column_y, edgecolor='black', facecolor='none'): for (xl, yl), (xr, yr) in rects: ax.add_patch( patches.Rectangle((xl, yl), xr - xl, yr - yl, linewidth=1, edgecolor=edgecolor, facecolor=facecolor, alpha=0.5)) ax.set_xlim(*get_bounds(df, column_x, indexes)) ax.set_ylim(*get_bounds(df, column_y, indexes)) ax.set_xlabel(column_x) ax.set_ylabel(column_y) def agg_categorical_column(series): return [','.join(set(series))] def agg_numerical_column(series): return [series.mean()] def build_anonymized_dataset(df, partitions, feature_columns, sensitive_column, max_partitions=None): aggregations = {} for column in feature_columns: if column in categorical: aggregations[column] = agg_categorical_column else: aggregations[column] = agg_numerical_column rows = [] for i, partition in enumerate(partitions): if i % 100 == 1: print("Finished {} partitions...".format(i)) if max_partitions is not None and i > max_partitions: break grouped_columns = df.loc[partition].agg(aggregations, squeeze=False) sensitive_counts = df.loc[partition].groupby(sensitive_column).agg( {sensitive_column: 'count'}) values = grouped_columns.iloc[0].to_dict() for sensitive_value, count in sensitive_counts[ sensitive_column].items(): if count == 0: continue values.update({ sensitive_column: sensitive_value, 'count': count, }) rows.append(values.copy()) return pd.DataFrame(rows) def diversity(df, partition, column): return len(df[column][partition].unique()) def is_l_diverse(df, partition, sensitive_column, l=2): return diversity(df, partition, sensitive_column) >= l def t_closeness(df, partition, column, global_freqs): total_count = float(len(partition)) d_max = None group_counts = df.loc[partition].groupby(column)[column].agg('count') for value, count in group_counts.to_dict().items(): p = count / total_count d = abs(p - global_freqs[value]) if d_max is None or d > d_max: d_max = d return d_max def is_t_close(df, partition, sensitive_column, global_freqs, p=0.2): if not sensitive_column in categorical: raise ValueError("this method only works for categorical values") return t_closeness(df, partition, sensitive_column, global_freqs) <= p #print the loaded data print(df.head()) #change the type of data other than numerical as categorical for name in categorical: # print(df[name]) df[name] = df[name].astype('category') #print the spans in the columns full_spans = get_spans(df, df.index) print(full_spans) #these columns will be shown in the generated data and in the graph too feature_columns = ['age', 'Diabetes'] sensitive_column = 'income' finished_partitions = partition_dataset(df, feature_columns, sensitive_column, full_spans, is_k_anonymous) print(len(finished_partitions)) print("++++++") indexes = build_indexes(df) column_x, column_y = feature_columns[:2] rects = get_partition_rects(df, finished_partitions, column_x, column_y, indexes, offsets=[0.0, 0.0]) #print the matrics print(rects[:10]) print("==========================") #show graph for k-anonimization pl.figure(figsize=(20, 20)) ax = pl.subplot(111) plot_rects(df, ax, rects, column_x, column_y, facecolor='r') pl.scatter(df[column_x], df[column_y]) pl.show() dfn = build_anonymized_dataset(df, finished_partitions, feature_columns, sensitive_column) print("start-------------") #this prints the k anonymized data print(dfn.sort_values(feature_columns + [sensitive_column])) with open("k-anonimized.txt", "w") as file: file.write( dfn.sort_values(feature_columns + [sensitive_column]).to_string()) print("end--------") finished_l_diverse_partitions = partition_dataset( df, feature_columns, sensitive_column, full_spans, lambda *args: is_k_anonymous(*args) and is_l_diverse(*args)) print(len(finished_l_diverse_partitions)) column_x, column_y = feature_columns[:2] l_diverse_rects = get_partition_rects(df, finished_l_diverse_partitions, column_x, column_y, indexes, offsets=[0.0, 0.0]) #show graph for l-anonymized data pl.figure(figsize=(20, 20)) ax = pl.subplot(111) plot_rects(df, ax, l_diverse_rects, column_x, column_y, edgecolor='b', facecolor='b') plot_rects(df, ax, rects, column_x, column_y, facecolor='r') pl.scatter(df[column_x], df[column_y]) pl.show() dfl = build_anonymized_dataset(df, finished_l_diverse_partitions, feature_columns, sensitive_column) print("start**************") #prints the L-precision anonymized data print(dfl.sort_values([column_x, column_y, sensitive_column])) with open("l-anonimized.txt", "w") as file: file.write( dfl.sort_values([column_x, column_y, sensitive_column]).to_string()) print("end****************") global_freqs = {} total_count = float(len(df)) group_counts = df.groupby(sensitive_column)[sensitive_column].agg('count') for value, count in group_counts.to_dict().items(): p = count / total_count global_freqs[value] = p print(global_freqs) print("###############") finished_t_close_partitions = partition_dataset( df, feature_columns, sensitive_column, full_spans, lambda *args: is_k_anonymous(*args) and is_t_close( *args, global_freqs)) print("&&&&&&&") print(len(finished_t_close_partitions)) print("&&&&&&&") dft = build_anonymized_dataset(df, finished_t_close_partitions, feature_columns, sensitive_column) print("start!!!!!!!!!!!!!!!!!!") #prints the T-closnessed data print(dft.sort_values([column_x, column_y, sensitive_column])) with open("t-anonimized.txt", "w") as file: file.write( dft.sort_values([column_x, column_y, sensitive_column]).to_string()) print("end!!!!!!!!!!!!!!!!!!!!") column_x, column_y = feature_columns[:2] t_close_rects = get_partition_rects(df, finished_t_close_partitions, column_x, column_y, indexes, offsets=[0.0, 0.0]) #show graph for t-anonymized data pl.figure(figsize=(20, 20)) ax = pl.subplot(111) plot_rects(df, ax, t_close_rects, column_x, column_y, edgecolor='b', facecolor='b') pl.scatter(df[column_x], df[column_y]) pl.show()
def plot(self): if self.name == 'regression': """Evaluate model during training. Print predictions including 4 rows: 1. target 2. predictive mean 3. error of the above two 4. two sigma of predictive variance Args: test_fixed (Tensor): (2, N, *), `test_fixed[0]` is the fixed test input, `test_fixed[1]` is the corresponding target """ self.bayes_nn.load_state_dict( torch.load("test.pt", map_location='cpu')) self.bayes_nn.eval() x = np.linspace(-0.5, 0.5, 100).reshape(-1, 1) y = self._f(x, sigma=self.noise_data) ytrue = self._f(x, sigma=0.0) xt, yt = torch.Tensor(x), torch.Tensor(y) xt, yt = xt.to(device), yt.to(device) with torch.no_grad(): y_pred_mean, y_pred_var = self.bayes_nn.predict(xt) pred = y_pred_mean.cpu().detach().numpy().ravel() var = y_pred_var.cpu().detach().numpy().ravel() plt.figure() plt.plot(x.ravel(), pred, label='Prediction') plt.scatter(x, y, label='Data') plt.plot(x, ytrue, label='Truth') plt.fill_between(x.ravel(), pred + var, pred - var, alpha=0.5, label='Uncertainty') plt.legend() plt.show() elif self.name == 'stenosis_hard': #self.bayes_nn.load_state_dict(torch.load("test1e-2_1p.pt",map_location = 'cpu')) self.bayes_nn.load_state_dict( torch.load("test1500.pt", map_location='cpu')) self.bayes_nn.eval() Data = np.load('stenosis_hard_coord.npz') x = Data['x'] y = Data['y'] u = Data['u'] v = Data['v'] P = Data['P'] u_CFD = u v_CFD = v P_CFD = P print('u_CFD is', u_CFD) print('v_CFD is', v_CFD) yUp = Data['yUp'] xt, yt = torch.Tensor(x), torch.Tensor(y) Rt = torch.Tensor(yUp).to(device) print('Rt.requires_grad is', Rt.requires_grad) xt, yt = xt.view(len(xt), -1), yt.view(len(yt), -1) xt.requires_grad = True yt.requires_grad = True xt, yt = xt.to(device), yt.to(device) inputs = torch.cat((xt, yt), 1) #with torch.no_grad(): print('inputs is', inputs) y_pred_mean = self.bayes_nn.forward(inputs) #pred = y_pred_mean.cpu().detach().numpy() pred = y_pred_mean for i in range(0, pred.shape[0]): # hard constraint u #pred[i,:,0] *= (Rt[:,0]**2 - yt[:,0]**2) # hard constraint v #pred[i,:,1] *= (Rt[:,0]**2 -yt[:,0]**2) # hard constraint P pred[i, :, 2] = (args.xStart - xt[:, 0]) * 0 + args.dP * ( args.xEnd - xt[:, 0]) / args.L + 0 * yt[:, 0] + ( args.xStart - xt[:, 0]) * (args.xEnd - xt[:, 0]) * pred[i, :, 2] print('pred.shape is', pred.shape) mean = pred.mean(0) EyyT = (pred**2).mean(0) EyEyT = mean**2 beta_inv = (-self.bayes_nn.log_beta).exp() print('beta_inv.mean', beta_inv.mean()) var = beta_inv.mean() + EyyT - EyEyT #var = (pred.std(0))**2 print('mean.shape', mean.shape) print('var.shape', var.shape) u_hard = mean[:, 0] v_hard = mean[:, 1] P_hard = mean[:, 2] u_hard = u_hard.view(len(u_hard), -1) v_hard = v_hard.view(len(v_hard), -1) P_hard = P_hard.view(len(P_hard), -1) u_hard = u_hard.cpu().detach().numpy() v_hard = v_hard.cpu().detach().numpy() P_hard = P_hard.cpu().detach().numpy() var_u = var[:, 0] var_v = var[:, 1] var_P = var[:, 2] var_u = var_u.view(len(var_u), -1) var_v = var_v.view(len(var_v), -1) var_P = var_P.view(len(var_P), -1) var_u = var_u.cpu().detach().numpy() var_v = var_v.cpu().detach().numpy() var_P = var_P.cpu().detach().numpy() #plot_x = 0.4 #plot_y = 0.045 plot_x = 0.4 * np.max(x) plot_y = 0.95 * np.max(y) fontsize = 18 #axis_limit = [-0.5, 0.5, -0.5, 0.2] noise_lv = 0.05 print('shape of u is', u.shape) print('shape of v is', v.shape) print('shape of P is', P.shape) u_noiseCFD = np.zeros_like(u) v_noiseCFD = np.zeros_like(v) P_noiseCFD = np.zeros_like(P) for i in range(0, len(u)): u_error = np.random.normal(0, noise_lv * np.abs(u[i]), 1) #print('std is',noise_lv*np.abs(sparse_udom[i])) #print('np.random.normal(0, noise_lv*np.abs(sparse_udom[i]), 1)',np.random.normal(0, noise_lv*np.abs(sparse_udom[i]), 1)) v_error = np.random.normal(0, noise_lv * np.abs(v[i]), 1) p_error = np.random.normal(0, noise_lv * np.abs(P[i]), 1) u_noiseCFD[i] = u[i] + u_error v_noiseCFD[i] = v[i] + v_error P_noiseCFD[i] = P[i] + p_error Data_sparse = np.load('xyuvp_sparse_separate_3sec.npz') sparse_x = Data_sparse['xdom'] print('x_size is', sparse_x.shape) sparse_y = Data_sparse['ydom'] sparse_u = Data_sparse['udom'] sparse_v = Data_sparse['vdom'] xinlet = Data_sparse['xinlet'] yinlet = Data_sparse['yinlet'] uinlet = Data_sparse['uinlet'] xoutlet = Data_sparse['xoutlet'] youtlet = Data_sparse['youtlet'] uoutlet = Data_sparse['uoutlet'] xb = Data_sparse['xb'] yb = Data_sparse['yb'] ub = Data_sparse['ub'] xb_full = Data_sparse['xb_full'] yb_full = Data_sparse['yb_full'] xtrain = np.concatenate((xinlet, xoutlet, sparse_x), 0) ytrain = np.concatenate((yinlet, youtlet, sparse_y), 0) ## loss_f = nn.MSELoss() print('u_hard is', u_hard) print('u_CFD is', u_CFD) # accruacy of u error_u = loss_f(torch.Tensor(u_hard), torch.Tensor(u_CFD)).item() # accuracy of v error_v = loss_f(torch.Tensor(v_hard), torch.Tensor(v_CFD)).item() # accuracy of P error_P = loss_f(torch.Tensor(P_hard), torch.Tensor(P_CFD)).item() ## relative norm ut = torch.Tensor(u_CFD) vt = torch.Tensor(v_CFD) pt = torch.Tensor(P_CFD) u_CFDnorm = loss_f(ut, torch.zeros_like(ut)).item() v_CFDnorm = loss_f(vt, torch.zeros_like(vt)).item() P_CFDnorm = loss_f(pt, torch.zeros_like(pt)).item() print('u_CFDnorm is', np.sqrt(u_CFDnorm)) print('v_CFDnorm is', np.sqrt(v_CFDnorm)) print('P_CFDnorm is', np.sqrt(P_CFDnorm)) np.savetxt('u_CFDnorm.csv', np.array([np.sqrt(u_CFDnorm)])) np.savetxt('v_CFDnorm.csv', np.array([np.sqrt(v_CFDnorm)])) np.savetxt('P_CFDnorm.csv', np.array([np.sqrt(P_CFDnorm)])) relative_error_u = np.sqrt(error_u / u_CFDnorm) relative_error_v = np.sqrt(error_v / v_CFDnorm) relative_error_P = np.sqrt(error_P / P_CFDnorm) print('relative norm |u - u_CFD|/|u_CFD|', relative_error_u) print('relative norm |v - v_CFD|/|v_CFD|', relative_error_v) print('relative norm |P - P_CFD|/|P_CFD|', relative_error_P) np.savetxt('Relative_error_u.csv', np.array([relative_error_u])) np.savetxt('Relative_error_v.csv', np.array([relative_error_v])) np.savetxt('Relative_error_P.csv', np.array([relative_error_P])) ### ## Std u mean uq_u_mean = np.sqrt(var_u).mean() ## Std v mean uq_v_mean = np.sqrt(var_v).mean() ## Std P mean uq_P_mean = np.sqrt(var_P).mean() ## Std u max uq_u_max = np.sqrt(var_u).max() ## Std v max uq_v_max = np.sqrt(var_v).max() ## Std P max uq_P_max = np.sqrt(var_P).max() # #print('uq_u.shape is', uq_u.shape) np.savetxt('error_u.csv', np.array([error_u])) np.savetxt('error_v.csv', np.array([error_v])) np.savetxt('error_P.csv', np.array([error_P])) np.savetxt('uq_umean.csv', np.array([uq_u_mean])) np.savetxt('uq_vmean.csv', np.array([uq_v_mean])) np.savetxt('uq_Pmean.csv', np.array([uq_P_mean])) np.savetxt('uq_umax.csv', np.array([uq_u_max])) np.savetxt('uq_vmax.csv', np.array([uq_v_max])) np.savetxt('uq_Pmax.csv', np.array([uq_P_max])) print('test loss u is', error_u) print('test loss v is', error_v) print('test loss P is', error_P) print('mean uq u is', uq_u_mean) print('mean uq v is', uq_v_mean) print('mean uq P is', uq_P_mean) print('max uq u is', uq_u_max) print('max uq v is', uq_v_max) print('max uq P is', uq_P_max) plt.figure() plt.subplot(2, 1, 1) #plt.scatter(x, y, c= np.sqrt(var_u)/u_hard, label = 'u_hard_var') plt.scatter(x, y, c=np.sqrt(var_u), label='u_hard_std', cmap='coolwarm') plt.text(plot_x, plot_y, r'u Std', { 'color': 'b', 'fontsize': fontsize }) #plt.axis('equal') plt.colorbar() plt.savefig('softuNN_std_noise15.png', bbox_inches='tight') plt.figure() plt.subplot(2, 1, 1) plt.scatter(x, y, c=u_hard, label='uhard', cmap='coolwarm', vmin=min(u_CFD), vmax=max(u_CFD)) plt.text(plot_x, plot_y, r'u Mean', { 'color': 'b', 'fontsize': fontsize }) plt.colorbar() #plt.axis('equal') plt.savefig('softuNN_mean_noise15.png', bbox_inches='tight') plt.figure() plt.subplot(2, 1, 1) plt.scatter(x, y, c=u_noiseCFD, label='u CFD', cmap='coolwarm', vmin=min(u_CFD), vmax=max(u_CFD)) plt.colorbar() plt.scatter(xtrain, ytrain, marker='x', c='black') plt.text(plot_x, plot_y, r'u CFD', { 'color': 'b', 'fontsize': fontsize }) #plt.scatter(x, y, c= np.sqrt(var_v), label = 'v_hard_std') #plt.scatter(x,y, c = np.sqrt(var_v)/v_hard, label = 'v_hard_std') #plt.axis('equal') plt.savefig('u_CFD_noise15.png', bbox_inches='tight') plt.figure() plt.subplot(2, 1, 1) #plt.scatter(x, y, c= np.sqrt(var_u)/u_hard, label = 'u_hard_var') plt.scatter(x, y, c=np.sqrt(var_v), label='u_hard_std', vmin=0.001, cmap='coolwarm') plt.text(plot_x, plot_y, r'v Std', { 'color': 'b', 'fontsize': fontsize }) plt.colorbar() #plt.savefig('u_hard_var.png',bbox_inches = 'tight') #plt.figure() plt.subplot(2, 1, 2) plt.scatter(x, y, c=v_hard, label='uhard', cmap='coolwarm') plt.text(plot_x, plot_y, r'v Mean', { 'color': 'b', 'fontsize': fontsize }) #plt.scatter(x, y, c= np.sqrt(var_v), label = 'v_hard_std') #plt.scatter(x,y, c = np.sqrt(var_v)/v_hard, label = 'v_hard_std') plt.colorbar() plt.savefig('v_hard_var.png', bbox_inches='tight') plt.figure() plt.scatter(x, y, c=P_hard, label='P_hard_std', cmap='coolwarm') plt.colorbar() plt.savefig('P_hard_var.png', bbox_inches='tight') #plt.scatter(x,y,label ='Data') plt.show() print('mean of stdvar_u', np.mean(np.sqrt(var_u))) print('mean of std var_v', np.mean(np.sqrt(var_v))) else: raise Exception("error,no such model")
std = StandardScaler() transformed = StandardScaler().fit_transform(x) cov_pca = convers_pca(no_of_components=2) cov_pca.fit(transformed) print(cov_pca.eigen_vectors) print(cov_pca.eigen_values) print(cov_pca.sorted_components) x_std = cov_pca.transform(transformed) plt.figure() plt.scatter(x_std[:, 0], x_std[:, 1], c=y) plt.show(block=False) from pandas.plotting import scatter_matrix iris = datasets.load_iris() df = pd.DataFrame(iris.data, columns=iris.feature_names) colors = np.array(50 * ['r'] + 50 * ['g'] + 50 * ['b']) #총 150개 데이터, 50개씩 순서대로 scatter_matrix(df, alpha=0.7, figsize=(10, 10), color=colors) plt.show() iris.data.shape iris_target = iris.target
def visualize_decision_tree_classes(self, best_nodes, classes_list=None, restrict_to_pcateg=False, exclude_leafs=None, savedir=None, postfix=''): """Visualize embeddings, colors by class predicted by decision tree.""" classes_list = classes_list or self.classes_list savedir = savedir or self.savedir init_point_size = 10. point_size_ds = 1. alphas = [0.8, 0.5] _, y = self._getxy() plt.figure(figsize=(7, 7)) point_size = init_point_size alphas = np.linspace(alphas[0], alphas[1], len(classes_list)) # keep track of plotted indices to be able to exclude downstream # nodes when plotting upstream ones when relevant kept_idxs = [] for clno, cls in enumerate(classes_list): # maybe restrict to leafs predicted as a particular class by NuCLS keep1 = None if restrict_to_pcateg: keep1 = (self.clusts.loc[:, 'pred_categ'] == cls).values # restrict to downstream leafs to node of interest keep2 = np.in1d(self.pred_y_leafs, self.node_leafs[best_nodes[cls]]) # noqa if keep1 is None: keep = keep2 else: keep = keep1 & keep2 # maybe exclude certain leafs if exclude_leafs is not None: keep[exclude_leafs] = False # keep track of kept idxes kept_idxs.extend(np.argwhere(keep)[:, 0].tolist()) # now restrict to leaves of interes y_subset = y[keep, :] # plot plt.scatter(y_subset[:, 0], y_subset[:, 1], c=np.array(VisConfigs.CATEG_COLORS[cls])[None, :] / 255., alpha=alphas[clno], s=point_size, edgecolors='none') point_size = point_size_ds * point_size plt.xlim(self._e0min, self._e0max) plt.ylim(self._e1min, self._e1max) plt.title(f'DTALE decisions ({postfix})', fontsize=14, fontweight='bold') # plt.show() # plt.savefig(opj(savedir, f'dectreeCol{postfix}.svg')) plt.savefig(opj(savedir, f'dectreeCol{postfix}.png')) return kept_idxs
# N = 6000 known_labels_ratio = 0.1 X, y = load_anomaly('../data/Animal_Data_prey_predator.csv') N = X.shape[0] rp = np.random.permutation(int(N/10)) # data_P = X[y==1][rp[:int(len(rp)*known_labels_ratio)]] data_P = X[y == 1] data_N = X[y == 0] # data_U = np.concatenate((X[y==1][rp[int(len(rp)*known_labels_ratio):]], X[y==0]), axis=0) print("Amount of positive samples: %d" % (data_P.shape[0])) print("Amount of negative samples: %d" % (data_N.shape[0])) plt.figure(figsize=(8, 4.5)) plt.scatter(data_N[:, 0], data_N[:, 1], c='k', marker='.', linewidth=1, s=1, alpha=0.5, label='Negative') plt.scatter(data_P[:, 0], data_P[:, 1], c='b', marker='o', linewidth=0, s=20, alpha=0.5, label='Positive') plt.grid() plt.legend() # model = DecisionTreeClassifier(max_depth=None, max_features=None, # criterion='gini', class_weight='balanced') baggingPU = svm.SVC(kernel='linear', probability=True) # true_labels = np.zeros(shape=(data_U.shape[0])) # true_labels[:int(len(rp)*(1.0-known_labels_ratio))] = 1.0 # With different interactions training_set = []
def scatter(self, x, y, c): plt.scatter(x, y, color=c, label='point')
def visualize_decision_tree_nodes(self, best_nodes, postfix=''): """Visualize the learned decision tree nodes.""" plt.figure(figsize=(7, 7)) # scatter actual points from NuCLS model in background _, y = self._getxy() plt.scatter(y[:, 0], y[:, 1], c='beige', alpha=0.6, s=4, edgecolors='none') # trace the learned decision tree for node in range(self.tree.node_count): if self.tree.children_left[node] == -1: continue me = self.tree.value[node, :, 0] clt = self.tree.value[self.tree.children_left[node], :, 0] crt = self.tree.value[self.tree.children_right[node], :, 0] plt.plot( [clt[0], me[0], crt[0]], [clt[1], me[1], crt[1]], color='gray', marker='.', linestyle='-', linewidth=0.5, markersize=3, alpha=0.5, ) # highligh root node me = self.tree.value[0, :, 0] plt.scatter([me[0]], [me[1]], color='k', s=30, alpha=1., edgecolors='k') # color best (class-representative) nodes by class for cls, node in best_nodes.items(): me = self.tree.value[node, :, 0] # color the trace along the decision tree till best node trace, _ = self._trace_from_node_to_root(node) for ndi in range(len(trace) - 1): clt = self.tree.value[trace[ndi], :, 0] crt = self.tree.value[trace[ndi + 1], :, 0] plt.plot( [clt[0], crt[0]], [clt[1], crt[1]], color='k', alpha=1., marker='o', markersize=2.5, linestyle='-', linewidth=1.3, ) # highlight actual chosen best node color = np.array(VisConfigs.CATEG_COLORS[cls])[None, :] / 255. plt.scatter([me[0]], [me[1]], color=color, s=150, alpha=1., edgecolors='none') plt.xlim(self._e0min, self._e0max) plt.ylim(self._e1min, self._e1max) plt.title(f'DTALE nodes ({postfix})', fontsize=14, fontweight='bold') # plt.show() # plt.savefig(opj(self.savedir, f'dectree{postfix}.svg')) plt.savefig(opj(self.savedir, f'dectree{postfix}.png'))
b_i = np.random.uniform(0., 1., (n_i,)) b_i = b_i / np.sum(b_i) # Dirac weights measures_locations.append(x_i) measures_weights.append(b_i) ############################################################################## # Compute free support barycenter # ------------- k = 10 # number of Diracs of the barycenter X_init = np.random.normal(0., 1., (k, d)) # initial Dirac locations b = np.ones((k,)) / k # weights of the barycenter (it will not be optimized, only the locations are optimized) X = ot.lp.free_support_barycenter(measures_locations, measures_weights, X_init, b) ############################################################################## # Plot data # --------- pl.figure(1) for (x_i, b_i) in zip(measures_locations, measures_weights): color = np.random.randint(low=1, high=10 * N) pl.scatter(x_i[:, 0], x_i[:, 1], s=b * 1000, label='input measure') pl.scatter(X[:, 0], X[:, 1], s=b * 1000, c='black', marker='^', label='2-Wasserstein barycenter') pl.title('Data measures and their barycenter') pl.legend(loc=0) pl.show()
om_net = one_mode_network(om_net_data) x = collections.OrderedDict() y = collections.OrderedDict() om_degree = [] tm_degree = [] for i in range(len(om_net.adj)): x[i] = tm_net.get_effective_size(i, True) #x[i] = tm_net.get_effective_size(i, False) y[i] = om_net.get_effective_size(i) om_degree.append(len(om_net.get_contacts(i))) tm_degree.append(len(tm_net.get_contacts(i, True))) #tm_degree.append(len(tm_net.get_contacts(i, False))) # plot by effective size stuff x_items = x.items() x_list = [i[1] for i in x_items] y_items = y.items() y_list = [j[1] for j in y_items] plt.scatter(x_list, y_list, s=1) plt.xlabel("Two-mode effective size") plt.ylabel("One-mode effective size") plt.show()
base = path.basename(datdir) if (path.isdir(datdir) and len( glob.glob("{0}/../../coord/TKR4p173/{1}/*.xy".format(datdir, base))) > 0 and len( glob.glob("{0}/../../coord/TKR4p173/diffusion_{1}.xc".format( datdir, base))) > 0): # Plot phase diagram plt.figure(0, figsize=(10, 7.5)) # inches plt.plot(XS, YS, "-k") plt.plot(X0, Y0, "-k", zorder=1) plt.title("Cr-Nb-Ni at %.0f K" % temp, fontsize=18) plt.xlabel(r"$x_\mathrm{Nb}$", fontsize=18) plt.ylabel(r"$x_\mathrm{Cr}$", fontsize=18) plt.xticks(np.linspace(0, 1, 21)) plt.scatter(Xtick, Ytick, color="black", s=3) gann = plt.text(simX(0.010, 0.495), simY(0.495), r"$\gamma$", fontsize=14) dann = plt.text(simX(0.230, 0.010), simY(0.010), r"$\delta$", fontsize=14) lann = plt.text(simX(0.340, 0.275), simY(0.275), r"L", fontsize=14) # Add composition pathways fnames = sorted( glob.glob("{0}/../../coord/TKR4p173/{1}/*.xy".format(datdir, base))) for file in fnames[::10]:
def dataloader(self): if self.name == 'regression': train_size = args.batch_size X = np.linspace(-0.5, 0.5, train_size).reshape(-1, 1) y = self._f(X, sigma=self.noise_data) y_true = self._f(X, sigma=0.0) plt.scatter(X, y, marker='+', label='Training data') plt.plot(X, y_true, label='Truth') plt.title('Noisy training data and ground truth') plt.legend() plt.show() X_train, Y_train = torch.Tensor(X), torch.Tensor(y) X_test, Y_test = torch.Tensor(X), torch.Tensor(y) data = torch.utils.data.TensorDataset(X_train, Y_train) train_loader = torch.utils.data.DataLoader(data, batch_size=train_size, shuffle=True) return train_loader, train_size elif self.name == 'stenosis_hard': train_size = args.batch_size N_y = 30 L = 1 xStart = 0 xEnd = xStart + L rInlet = 0.05 nPt = 100 unique_x = np.linspace(xStart, xEnd, nPt) sigma = 0.1 scale = 0.005 mu = 0.5 * (xEnd - xStart) x_2d = np.tile(unique_x, N_y) x = x_2d x = np.reshape(x, (len(x), 1)) Data = np.load('xyuvp_uinlet.npz') x = Data['x'] y = Data['y'] u = Data['u'] v = Data['v'] P = Data['p'] x = x[..., None] y = y[..., None] u = u[..., None] v = v[..., None] P = P[..., None] #print('x.shape is',x.shape) R = scale * 1 / np.sqrt(2 * np.pi * sigma**2) * np.exp( -(x - mu)**2 / (2 * sigma**2)) nu = 1e-3 yUp = rInlet - R yDown = -rInlet + R plt.scatter(x, yUp) plt.scatter(x, yDown) plt.scatter(x, y) plt.axis('equal') plt.show() ############################ np.savez('stenosis_hard_coord', x=x, y=y, yUp=yUp, u=u, v=v, P=P) ################ data = torch.utils.data.TensorDataset(torch.FloatTensor(x), torch.FloatTensor(y)) train_loader = torch.utils.data.DataLoader(data, batch_size=train_size, shuffle=True) print('len(data is)', len(data)) print('len(dataloader is)', len(train_loader)) return train_loader, train_size else: raise Exception("error,no such model")
def draw_world(occupancy_grid, robot_locations, assignments, lines_plot={}, poses=[], line_multiplier=1): fig, ax = plt.subplots() occupancy_grid.draw() colours = [(1, 0, 0), (0, 1, 0), (0, 0, 1), (1, 1, 0), (1, 0, 1), (0, 1, 1), (0.5, 0, 0), (0, 0.5, 0), (0, 0, 0.5), (0.5, 0.5, 0), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.25, 0, 0), (0, 0.25, 0), (0, 0, 0.25), (0.25, 0.25, 0), (0.25, 0, 0.25), (0, 0.25, 0.25)] for (i, j), v in np.ndenumerate(assignments): pos = occupancy_grid.get_position(i, j) from_origin = pos - occupancy_grid.origin position = occupancy_grid.origin + line_multiplier * from_origin if occupancy_grid.is_free(position): if v == 0: continue rectangle = plt.Rectangle( position, occupancy_grid.resolution * line_multiplier, occupancy_grid.resolution * line_multiplier, fc=colours[v]) plt.gca().add_patch(rectangle) #plt.show() for pose in poses: x, y, angle = pose x, y = occupancy_grid.get_position(x, y) if angle == 0: plt.arrow(x, y - occupancy_grid.resolution / 8, 0, occupancy_grid.resolution / 4) elif angle == np.pi / 2: plt.arrow(x + occupancy_grid.resolution / 8, y, -occupancy_grid.resolution / 4, 0) elif angle == np.pi: plt.arrow(x, y + occupancy_grid.resolution / 8, 0, -occupancy_grid.resolution / 4) elif angle == -np.pi / 2: plt.arrow(x - occupancy_grid.resolution / 8, y, occupancy_grid.resolution / 4, 0) else: print("Unable to plot") sys.exit() for robot in robot_locations: plot_position = occupancy_grid.get_position(robot[0], robot[1]) plt.scatter(plot_position[0], plot_position[1], s=10, marker='o', color='black', zorder=1000) plt.scatter(plot_position[0], plot_position[1], s=10, marker='o', color='black', zorder=1000) plt.axis('equal') plt.xlabel('x') plt.ylabel('y') plt.show()
import sklearn.discriminant_analysis as LDA import matplotlib.pylab as plt import numpy as np X_train = np.load('./data/wtf_X_train.npy') X_test = np.load('./data/wtf_X_test.npy') y_train = np.load('./data/wtf_y_train.npy') y_test = np.load('./data/wtf_y_test.npy') l_train = np.nonzero(y_train)[1] l_test = np.nonzero(y_test)[1] lda = LDA.LinearDiscriminantAnalysis(n_components=2) X_train_lda = lda.fit_transform(X_train, l_train) X_test_lda = lda.fit_transform(X_test, l_test) plt.figure() plt.scatter(X_train_lda[:, 0], X_train_lda[:, 1], s=2, c=l_train, cmap='hsv') plt.figure() plt.scatter(X_test_lda[:, 0], X_test_lda[:, 1], s=2, c=l_test, cmap='hsv') plt.show()
def phaseToMJD(tPho=Table(), MJD0=51000.750, \ per=6.4714, tZer=48813.873, \ trimWeird=True, \ plotDBG=False): """Unpacks phase to MJD, with various assumptions about the observation date and row order. MJD0 is the MJD at the start of the run (will be used to find the nearest whole-number number of cycles for the ephemeris). Defaults to 1998 July 6th at 6pm UT. trimWeird: Two of the Zurita points appear to be mis-ordered. Remove them if trimWeird is set to "True." """ if not 'phase' in tPho.colnames: return tPho # return unchanged phase = tPho['phase'] # WATCHOUT - The Casares et al. ephemeris expresses tZer as JD - 2 # 400 000.0 which means MJD + 0.5. For ease of reading, we add # that 0.5 days back on here. tZ = tZer - 0.5 N_start = np.floor((MJD0 - tZ) / per) # which means the next lowest phase zero must occur at MJD... mjdPrevZero = tZ + N_start * per # print "INFO - nearest phase zero is", mjdPrevZero # it will be handy to have an array giving "weird" points for # which the time appears to have moved backwards... bWeird = np.repeat(False, np.size(phase)) # we loop through these since it's the row number that preserves # the phase ordering mjdCalc = np.zeros(np.size(phase)) iOrbit = 0.0 for iRow in range(np.size(phase)): # which orbit are we on now? if iRow > 0: if phase[iRow - 1] - phase[iRow] > 0.5: iOrbit = iOrbit + 1.0 # now compute the mjd calc nOrbs = phase[iRow] + iOrbit mjdCalc[iRow] = mjdPrevZero + nOrbs * per if iRow > 0: if mjdCalc[iRow] < mjdCalc[iRow - 1]: bWeird[iRow] = True # update in-place tPho['time'] = Column(mjdCalc) if plotDBG: plt.figure(1) plt.clf() lCount = np.arange(np.size(phase)) plt.plot(lCount, phase, 'bo', ls='-') plt.plot(lCount[bWeird], phase[bWeird], 'rx', zorder=25) plt.xlabel('Row number') plt.ylabel('Phase') plt.figure(2) plt.clf() plt.scatter(tPho['time'], tPho['mag'], c='g', \ edgecolor='0.5') plt.plot(tPho['time'], tPho['mag'], c='g', lw='1') plt.plot(tPho['time'][bWeird], tPho['mag'][bWeird], 'rx', zorder=25) plt.xlabel('MJD') plt.ylabel('Magn') if trimWeird: tPho = tPho[~bWeird] tPho.meta['trimOoO'] = int(np.sum(bWeird)) # return the table with MJDCalc return tPho
def plot_results(result_folder, observed_data, input_model): plt.rcParams['xtick.labelsize'] = 15 plt.rcParams['ytick.labelsize'] = 15 plt.rcParams['xtick.major.size'] = 6 plt.rcParams['xtick.minor.size'] = 6 plt.rcParams['xtick.major.width'] = 2 plt.rcParams['xtick.minor.width'] = 2 result_file = result_folder + "/full_list.txt" observed_data = result_folder + "/" + observed_data input_model = result_folder + "/" + input_model lines = open(observed_data).readlines() TTobs, EEobs, ssdobs = [],[],[] for i in range(1,len(lines)): Tobs = float(lines[i].split()[0]) Eobs = float(lines[i].split()[1]) sdobs = float(lines[i].split()[2]) TTobs.append(Tobs) EEobs.append(Eobs) ssdobs.append(sdobs) Zi, VPi, VSi, RHOi = plot_array_from_model(input_model) model_list = [] lines = open(result_file).readlines() n_model = 1 iindex = [] nmod = 0 ccost, vvs1, vvs2, vvs3, vvs4, vvs5, vvs6 = [], [],[],[],[],[],[] hh1, hh2, hh3 = [],[],[] for i in range(0,len(lines)): if lines[i].strip() and lines[i].split()[0] == "mft:": mft = float(lines[i].split()[1]) rough = float(lines[i].split()[3]) cost = float(lines[i].split()[5]) cost = mft EE, TT = [], [] for n in range(3,1000): if lines[i+n].split()[0] != "!": T = float(lines[i+n].split()[0]) E = float(lines[i+n].split()[1]) EE.append(E) TT.append(T) else: break tthick, vvs, vvp, rrho = [],[],[],[] for m in range(n+2, 1000): if lines[i+m][1] !="#": #print lines[i+m] thick = float(lines[i+m].split()[0]) vs = float(lines[i+m].split()[2]) vp = float(lines[i+m].split()[1]) rho = float(lines[i+m].split()[3]) tthick.append(thick) vvs.append(vs) vvp.append(vp) rrho.append(rho) else: break iindex.append(nmod) nmod+=1 model_list.append([n_model, cost, TT, EE, tthick, vvs, vvp, rrho, i]) n_model += 1 vs1 = vvs[0] vs2 = vvs[1] vs3 = vvs[2] vs4 = vvs[3] vs5 = vvs[4] vs6 = vvs[5] h1 = tthick[0] h2 = tthick[1] h3 = tthick[2] ccost.append(cost) vvs1.append(vs1) vvs2.append(vs2) vvs3.append(vs3) vvs4.append(vs4) vvs5.append(vs5) vvs6.append(vs6) hh1.append(h1) hh2.append(h2) hh3.append(h3) model_list_sorted = sorted(model_list, key = itemgetter(1), reverse=True) min_cost = model_list_sorted[-1][1] max_cost = model_list_sorted[0][1] perc = 20 # 20% threshold = min_cost * (perc + 100)/100. TTbest = model_list_sorted[-1][2] EEbest = model_list_sorted[-1][3] tthick = model_list_sorted[-1][4] vvs = model_list_sorted[-1][5] vvp = model_list_sorted[-1][6] rrho = model_list_sorted[-1][7] line_best = model_list_sorted[-1][8] out = open(result_folder+"/best_model.d","w") for i in range(line_best, line_best+1000): # print lines[i] if lines[i].split()[0] == "model:": for j in range(1,11): out.write(lines[i+j]) break out.close() mean = (vvs[0]*tthick[0] + vvs[1]*tthick[1])/(tthick[0]+tthick[1]) Zbest, VSbest, VPbest, RHObest = prepare4plot(tthick, vvs, vvp, rrho) cmap = cm.hot fig = plt.figure(1, figsize=(8.27, 11.69)) fig.subplots_adjust( wspace=1.) ax1 = plt.subplot2grid((2, 4), (0, 0), colspan=4) ax2 = plt.subplot2grid((2, 4), (1, 0), colspan=2) ax3 = plt.subplot2grid((2, 4), (1, 2), colspan=2) for model in sorted(model_list_sorted, key = itemgetter(1), reverse=True): cost = model[1] TT = model[2] EE = model[3] tthick = model[4] vvs = model[5] vvp = model[6] rrho = model[7] Z, VS, VP, RHO = prepare4plot(tthick, vvs, vvp, rrho) if cost <= threshold: colorVal = normalize_misfit(cost, min_cost, threshold, 0.5,1) ax2.plot(VS,Z, color=str(colorVal), linewidth=2, zorder=0) ax3.plot(VS,Z, color=str(colorVal), linewidth=2, zorder=0) ax1.plot(TT,EE, color=str(colorVal), linewidth=2, zorder=9) #plt.subplot(223) ax2.plot(VSbest,Zbest, color="red", zorder=2, label="Best model", linewidth=2) ax2.plot(VSi, Zi, color="black", label= "Litho1.0", linewidth=2, zorder=2, linestyle=":") ax2.set_ylim(45,0) ax2.set_xlim(0.2,5.0) ax2.set_xlabel("Vs (km/s)",size=15) ax2.set_ylabel("Depth (km)",size=15) ax2.legend(loc=3, fontsize=13) ax2.xaxis.set_major_locator(ticker.FixedLocator([1,2,3,4,5])) #plt.axhline(6,color="0.5",linestyle="--",linewidth=0.5) #ax2.tick_params(labelsize=13) #plt.subplot(224) ax3.plot(VSbest,Zbest, color="red", zorder=2, label="Best model", linewidth=2) ax3.plot(VSi, Zi, color="black", label= "Litho1.0", linewidth=2, zorder=2, linestyle=":") ax3.set_ylim(6,0) ax3.set_xlim(0., 4) ax3.set_xlabel("Vs (km/s)",size=15) ax3.set_ylabel("Depth (km)",size=15) ax3.legend(loc=3, fontsize=13) ax1.errorbar(TTobs, EEobs, yerr=ssdobs, color="black", fmt=" ",zorder=0, alpha=0.3) ax1.scatter(TTobs, EEobs, color="black", s=30, label="Observed data", zorder=0) ax1.plot(TTbest,EEbest, color="red", label="Theo. ellipticity from best model",zorder=10, linewidth=2) ax1.set_xscale("log") ax1.set_xlim(0.9*min(TT),max(TT)*1.1) ax1.set_ylim(-1,1) ax1.set_xlabel("Period (s)",size=15) ax1.set_ylabel("Log(H/V)",size=15) ax1.legend(loc=4, fontsize=15) #ax1.tick_params(labelsize=13) ax1.xaxis.set_minor_formatter(FormatStrFormatter("%.0f")) ax1.xaxis.set_major_formatter(FormatStrFormatter("%.0f")) #ax1.xaxis.set_major_locator(ticker.FixedLocator([2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90])) #ax1.set_xticks([9]) ax1.set_xticklabels([1,1,1,1,1,1,1,1,2,3,4,5,6,7,8,"",20,"",40,"",60,"",80,""],minor=True) plt.suptitle("Real data inversion\nStation: CCD\nMisfit threshold = " +str(perc)+"%",size=18) plt.savefig(result_folder + "/results_CCD", dpi=200) plt.close() #---------------------------------------------------------- fig = plt.figure(1, figsize=(8.27, 11.69)) fig.subplots_adjust(wspace=0.5, hspace=0.4, top=0.9) plt.subplot(9,1,1) plt.scatter(iindex, zip(*model_list)[1], color="black", s=1) plt.ylabel("Misfit", size=13) plt.yscale("log") plt.ylim(min(zip(*model_list)[1])*0.5, max(zip(*model_list)[1])) plt.subplot(9,1,2) plt.scatter(iindex, vvs1, color="black", s=1) plt.ylabel("Vs 1\n(km/s)", size=13) plt.subplot(9,1,3) plt.scatter(iindex, hh1, color="black", s=1) plt.ylabel("H 1\n(km)", size=13) plt.subplot(9,1,4) plt.scatter(iindex, vvs2, color="black", s=1) plt.ylabel("Vs 2\n(km/s)", size=13) plt.subplot(9,1,5) plt.scatter(iindex, hh2, color="black", s=1) plt.ylabel("H 2\n(km)", size=13) plt.subplot(9,1,6) plt.scatter(iindex, hh3, color="black", s=1) plt.ylabel("H 3\n(km)", size=13) plt.subplot(9,1,7) plt.scatter(iindex, vvs4, color="black", s=1) plt.ylabel("Vs 4\n(km/s)", size=13) plt.subplot(9,1,8) plt.scatter(iindex, vvs5, color="black", s=1) plt.ylabel("Vs 5\n(km/s)", size=13) plt.subplot(9,1,9) plt.scatter(iindex, vvs6, color="black", s=1) plt.ylabel("Vs 6\n(km/s)", size=13) plt.xlabel("# Model", size=13) plt.suptitle("Inversion evolution", size=18) plt.savefig(result_folder + "/convergence.png") plt.close() #====================================================================== plt.figure(figsize=(15,15)) plt.subplots_adjust(left=0.1, right = 0.9, top=0.9, bottom=0.1, hspace=0.2, wspace=0.2) ccost, vvs1, vvs2, vvs3, vvs4, vvs5, vvs6 = [], [],[],[],[],[],[] for model in sorted(model_list_sorted, key = itemgetter(1), reverse=True): cost = model[1] TT = model[2] EE = model[3] tthick = model[4] vvs = model[5] vvp = model[6] rrho = model[7] vs1 = vvs[0] vs2 = vvs[1] vs3 = vvs[2] vs4 = vvs[3] vs5 = vvs[4] vs6 = vvs[5] ccost.append(cost) vvs1.append(vs1) vvs2.append(vs2) vvs3.append(vs3) vvs4.append(vs4) vvs5.append(vs5) vvs6.append(vs6) #==================================================== vmin = np.log10(min_cost) vmax = np.log10(max_cost) cmap = cm.jet m="*" fc="red" eg="black" s=400 plt.subplot(5,5,1) cp = plt.scatter(vvs1, vvs2, c=np.log10(ccost),s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.ylabel("Vs2", fontsize=15) plt.xlim(min(vvs1), max(vvs1)) plt.ylim(min(vvs2), max(vvs2)) plt.subplot(5,5,6) cp = plt.scatter(vvs1, vvs3, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.ylabel("Vs3", fontsize=15) #plt.xlim(min(vvs1), max(vvs1)) #plt.ylim(min(vvs3), max(vvs3)) plt.subplot(5,5,7) cp = plt.scatter(vvs2, vvs3, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) #plt.xlim(min(vvs2), max(vvs2)) #plt.ylim(min(vvs3), max(vvs3)) plt.subplot(5,5,11) cp = plt.scatter(vvs1, vvs4, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs1), max(vvs1)) plt.ylim(min(vvs4), max(vvs4)) plt.ylabel("Vs4", fontsize=15) plt.subplot(5,5,12) cp = plt.scatter(vvs2, vvs4, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs2), max(vvs2)) plt.ylim(min(vvs4), max(vvs4)) plt.subplot(5,5,13) cp = plt.scatter(vvs3, vvs4, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) #plt.xlim(min(vvs3), max(vvs3)) #plt.ylim(min(vvs4), max(vvs4)) plt.subplot(5,5,16) cp = plt.scatter(vvs1, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs1), max(vvs1)) plt.ylim(min(vvs5), max(vvs5)) plt.ylabel("Vs5", fontsize=15) plt.subplot(5,5,17) cp = plt.scatter(vvs2, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs2), max(vvs2)) plt.ylim(min(vvs5), max(vvs5)) plt.xlabel("Vs2", fontsize=15) plt.subplot(5,5,18) cp = plt.scatter(vvs3, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) #plt.xlim(min(vvs3), max(vvs3)) #plt.ylim(min(vvs5), max(vvs5)) plt.xlabel("Vs3", fontsize=15) plt.subplot(5,5,19) cp = plt.scatter(vvs4, vvs5, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs4), max(vvs4)) plt.ylim(min(vvs5), max(vvs5)) plt.xlabel("Vs4", fontsize=15) plt.subplot(5,5,21) cp = plt.scatter(vvs1, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs1), max(vvs1)) plt.ylim(min(vvs6), max(vvs6)) plt.xlabel("Vs1", fontsize=15) plt.ylabel("Vs6", fontsize=15) plt.subplot(5,5,22) cp = plt.scatter(vvs2, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs2), max(vvs2)) plt.ylim(min(vvs6), max(vvs6)) plt.xlabel("Vs2", fontsize=15) plt.subplot(5,5,23) cp = plt.scatter(vvs3, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) #plt.xlim(min(vvs3), max(vvs3)) #plt.ylim(min(vvs6), max(vvs6)) plt.xlabel("Vs3", fontsize=15) plt.subplot(5,5,24) cp = plt.scatter(vvs4, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs4), max(vvs4)) plt.ylim(min(vvs6), max(vvs6)) plt.xlabel("Vs4", fontsize=15) plt.subplot(5,5,25) cp = plt.scatter(vvs5, vvs6, c=np.log10(ccost), s=20, linewidth=0, vmin=vmin, vmax=vmax, cmap=cmap) plt.xlim(min(vvs5), max(vvs5)) plt.ylim(min(vvs6), max(vvs6)) plt.xlabel("Vs5", fontsize=15) plt.savefig(result_folder + "/correlation.png") plt.close() return