def test2_normalize():

	X = np.array([	[1,0],
					[0,1]])

	#Var(X[:,0]) = (.5)^2 -> SD(X[:,0]) = .5 # same for second column

	normalized_X = np.array([[ 1,-1],
							 [-1, 1]])

	assert_allclose(normalize_matrix(X),normalized_X)
Пример #2
0
########################

# Our thoughts on this matter goes as follows:
# First we'd like to run the methods on data with and without outliers (2 points
# have very high leverage), on data that has and hasn't been scaled and 
# centered.

# These will be kept in numpy arrays until needed to be put into panda DataFrame
# format


# since we have outliers, the earlier normalizing is really going to do a 

np.std(X_full[:,0])

X_full_scaled = normalize_matrix(X_full)
X_full        = X_full


# identifying outliers (via leverage), for our data it works with if we approach
# it in 1 leverage analysis run or 2.
leverage,X_rank = leverage_make(X_full)
keepers         = np.array([True if x not in sorted(leverage)[-2:] else False 
							for x in leverage])

X_wo            = X_full[keepers,:]
names_wo        = list(np.array(names_full)[keepers])

X_wo_scaled     = normalize_matrix(X_wo)