Пример #1
0
def calculate_dist(data):
    """
        This function replaces NaN values for median and scaling values to [0 1]
        it also calculates three vectors of Euclidean,
        Mahalanobis and Cosine distances between centroid of rows and
        each row in dataframe
        
        Returns:
            three vectors of distances
        
    """
    import scipy.spatial.distance as dist
    import numpy as np
    import scipy as sp
    from ploting import boxplot, histogram

    # scaling
    del data['Patient_id']
    for k in data.columns:
        data[k] = data[k].fillna(round(data[k].median()))
        data[k] = (data[k] - data[k].min())
        data[k] = (data[k] - data[k].min()) / data[k].max()
        if data[k].isnull().sum() > len(data[k]) * 0.8 or len(set(
                data[k])) == 1:
            del data[k]

    # Calculate centroid
    centroid = np.mean(data)
    centroid = centroid.as_matrix()
    numpyMatrix = data.as_matrix()

    #Calculate covariance matrix
    covmx = data.cov()
    invcovmx = sp.linalg.pinv(covmx)

    #Calculate Euclidean,Mahalanobis and Cosine distance
    Mahaldist, Eucliddist, Cosinedist = [], [], []
    for h in range(len(numpyMatrix)):
        Mahaldist.append(dist.mahalanobis(numpyMatrix[h], centroid, invcovmx))
        Eucliddist.append(dist.euclidean(numpyMatrix[h], centroid))
        Cosinedist.append(dist.cosine(numpyMatrix[h], centroid))

    # ploting
    boxplot(Eucliddist, Mahaldist, Cosinedist)
    histogram(Eucliddist, Mahaldist, Cosinedist, len(data))

    return Mahaldist, Eucliddist, Cosinedist
Пример #2
0
    plt.plot(r, r * model_m.T[k], 'r', alpha=0.5)
    if n != 1:
        plt.ylabel(r'$\Delta \bar z_{' + str(k + 1) + '} r$', fontsize=14)
    else:
        plt.ylabel(r'$\Delta \bar z r$', fontsize=14)
    plt.axhline(y=0.0, xmin=0.0, xmax=1.0, linewidth=1.5, color='k')
    plt.xscale('log')
    if (k + 1) % row == 0 or k + 1 == n:
        plt.xlabel(r'$r$ (Mpc)')

plt.subplots_adjust(hspace=0.26,
                    wspace=0.56,
                    right=0.95,
                    left=0.16,
                    top=0.93,
                    bottom=0.10)
plt.suptitle('Delta Average Redshift*Radius vs. Radius')
plt.show()
'''
ploting.histogram(b_g_rel[2],100,range=(0.,3.),color='r',xlabel='Redshift',ylabel='counts',title='Log of Background Galaxies Histogram',yscale='log')

ploting.plot_posit(b_g_rel[0],b_g_rel[1],color='r')
'''
program_end = time.time()
run_time = program_end - program_start

print("done")
print("total run time: " + str(run_time) + " sec")
print("             or " + str(run_time / 60.) + " min")
print("             or " + str(run_time / 3600.) + " hr")