def test1_leverage(): """ Testing the leverage_make function """ X=np.ones((10,3)) X[:,0] = np.array([-1,-.5,0, 0, 0, 0, 0,0,.5,1]) X[:,1] = np.array([-1, 0,0, 0,-.5, 0,.5,0, 0,1]) X[:,2] = np.array([-1,-.5,0,-.5, 0,.5, 0,0,.5,1]) l=leverage_make(X) assert_allclose(X.dot(npl.pinv(X.T.dot(X))).dot(X.T).diagonal(),l[0]) assert(l[1]==3) # more basic test of same thing: Y=np.ones((3,3)) Y[:,0] = np.array([-1,0,1]) Y[:,1] = np.array([-1,1,3]) Y[:,2] = np.array([ 0,0,0]) # since Mahalanobis = (N-1)(leverage-1/N) # * where N is total number of observations. # and the second element is at the center of the distribution # => 0 = (N-1)(l_2 - 1/N) # 0 = 2 (l_2 - 1/3) # 2/3 = 2 l_2 # l_2 = 1/3 leverage_y = leverage_make(Y) assert(np.round(leverage_y[0][1],10) == np.round(1/3,10)) # did you notice that Y only as rank 2? assert(leverage_y[1] == 2)
sys.path.append(function_location) from visuals_functions import three_d_scatter_rotation_gen,three_d_cluster_rotation_gen from outlier_and_normalization_functions import leverage_make # Kmean clustering with 2 clusters ################## #### Leverage #### ################## X_full = np.load(data_created+"X_full.npy") names_full = np.load(data_created+"names_full.npy") leverage,X_rank = leverage_make(X_full) X_minus1 = X_full[leverage!=np.max(leverage),:] leverage,X_rank = leverage_make(X_minus1) X_minus2 = X_minus1[leverage!=np.max(leverage),:] # All that stuff # hierarchical clustering # http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage # http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.cluster.hierarchy.single.html import scipy import scipy.cluster
if len(grey_option) == X_full.shape[-1]: grey_output, kk = "w_grey",0 X = data[num_data][:,grey_option] distance = scipy.spatial.distance.pdist(X) single_hierarchy = scipy.cluster.hierarchy.single(distance) image_extension = "single"+data_names[num_data]+"_"+grey_output+".png" # initial plot for aljustments to distance given to making the classes if data_names[num_data].rfind("full") !=(-1): leverage, _ =leverage_make(X) keepers = np.array([True if x not in sorted(leverage)[-2:] else False for x in leverage]) X_new = X[keepers,:] distance_new = scipy.spatial.distance.pdist(X_new) single_hierarchy_new = scipy.cluster.hierarchy.single(distance_new) plt.figure() plt.plot(single_hierarchy_new[:,2]/np.max(single_hierarchy_new[:,2])) plt.plot([0,159],[.4,.4]) plt.text(159/2,.4+.025,str(.4)) plt.plot([0,159],[.05,.05]) plt.text(159/2,.05+.025,str(.05))
"bad_SALT2","clare_mega_bad","clare_probably_bad"] # visualizing bad ones coloring_bad = np.zeros(len(names_full)) names_in_mine = {} for i,lists in enumerate(special_look): for element in lists: if element in names_full: names_in_mine[element]=special_look_names[i] coloring_bad[names_full==element]=(i+1) coloring_standardized = coloring_bad.copy() for i,cluster in enumerate(set(coloring_bad)): coloring_standardized[coloring_bad==cluster]=i three_d_plot_funct(X_full,coloring_standardized,save=False) # my suggested bad SN leverage,rank=leverage_make(X_full) bad=names_full[np.max(leverage)==leverage] sorted(leverage)[1] second = names_full[sorted(leverage)[1]==leverage]