def calculate_dist(data): """ This function replaces NaN values for median and scaling values to [0 1] it also calculates three vectors of Euclidean, Mahalanobis and Cosine distances between centroid of rows and each row in dataframe Returns: three vectors of distances """ import scipy.spatial.distance as dist import numpy as np import scipy as sp from ploting import boxplot, histogram # scaling del data['Patient_id'] for k in data.columns: data[k] = data[k].fillna(round(data[k].median())) data[k] = (data[k] - data[k].min()) data[k] = (data[k] - data[k].min()) / data[k].max() if data[k].isnull().sum() > len(data[k]) * 0.8 or len(set( data[k])) == 1: del data[k] # Calculate centroid centroid = np.mean(data) centroid = centroid.as_matrix() numpyMatrix = data.as_matrix() #Calculate covariance matrix covmx = data.cov() invcovmx = sp.linalg.pinv(covmx) #Calculate Euclidean,Mahalanobis and Cosine distance Mahaldist, Eucliddist, Cosinedist = [], [], [] for h in range(len(numpyMatrix)): Mahaldist.append(dist.mahalanobis(numpyMatrix[h], centroid, invcovmx)) Eucliddist.append(dist.euclidean(numpyMatrix[h], centroid)) Cosinedist.append(dist.cosine(numpyMatrix[h], centroid)) # ploting boxplot(Eucliddist, Mahaldist, Cosinedist) histogram(Eucliddist, Mahaldist, Cosinedist, len(data)) return Mahaldist, Eucliddist, Cosinedist
plt.plot(r, r * model_m.T[k], 'r', alpha=0.5) if n != 1: plt.ylabel(r'$\Delta \bar z_{' + str(k + 1) + '} r$', fontsize=14) else: plt.ylabel(r'$\Delta \bar z r$', fontsize=14) plt.axhline(y=0.0, xmin=0.0, xmax=1.0, linewidth=1.5, color='k') plt.xscale('log') if (k + 1) % row == 0 or k + 1 == n: plt.xlabel(r'$r$ (Mpc)') plt.subplots_adjust(hspace=0.26, wspace=0.56, right=0.95, left=0.16, top=0.93, bottom=0.10) plt.suptitle('Delta Average Redshift*Radius vs. Radius') plt.show() ''' ploting.histogram(b_g_rel[2],100,range=(0.,3.),color='r',xlabel='Redshift',ylabel='counts',title='Log of Background Galaxies Histogram',yscale='log') ploting.plot_posit(b_g_rel[0],b_g_rel[1],color='r') ''' program_end = time.time() run_time = program_end - program_start print("done") print("total run time: " + str(run_time) + " sec") print(" or " + str(run_time / 60.) + " min") print(" or " + str(run_time / 3600.) + " hr")