예제 #1
0
def test_outlier_weights():
    rand = np.random.RandomState(0)
    X = rand.multivariate_normal([0, 0], [[12, 6], [6, 5]], size=1000)
    pca = PCA(2).fit(X)

    def check_results(Estimator, n_outliers, noise_level, rtol):
        i = rand.randint(0, 100, size=n_outliers)
        j = rand.randint(0, 2, size=n_outliers)
        X2 = X.copy()
        X2[i, j] += noise_level * rand.randn(n_outliers)
        W2 = np.ones_like(X2)
        W2[i, j] = 1. / noise_level

        pca2 = Estimator(2, **KWDS[Estimator]).fit(X2, weights=W2)
        assert_columns_allclose_upto_sign(pca.components_.T,
                                          pca2.components_.T,
                                          rtol=rtol)

    for (n_outliers, noise_level, rtol) in [(1, 20, 1E-3), (10, 10, 3E-2)]:
        for Estimator in ESTIMATORS:
            yield check_results, Estimator, n_outliers, noise_level, rtol
예제 #2
0
w0.shape, w.shape, df[weights.values.ravel() != 0].shape, df.shape

# In[9]:

from wpca import WPCA as PCA

# In[10]:

from sklearn.pipeline import Pipeline
# from sklearn.decomposition import PCA
from sklearn.preprocessing import Imputer, StandardScaler

imp = Imputer(strategy='mean')
scl = StandardScaler()
pca = PCA()
pipeline = Pipeline([
    ('imp', imp),
    ('scl', scl),
    ('pca', pca),
])
scaler_pipeline = Pipeline([
    ('imp', imp),
    ('scl', scl),
])
data_pca = pipeline.fit_transform(df)
_scaled = scaler_pipeline.transform(df)

# ### Explained variance
#
# How much of the variance in the data is explained by each successive component?
print('NumPy covariance matrix: \n%s' %np.cov(X_std.T))

#Perform eigendecomposition on covariance matrix
cov_mat = np.cov(X_std.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
print('Eigenvalues in descending order:')
for i in eig_pairs:
    print(i[0])

#Variable loadings
pca = PCA(n_components=2)
pca.fit_transform(df1)
i = np.identity(df1.shape[1]) 
coef = pca.transform(i)
loadings = pd.DataFrame(coef, columns=['PC-1', 'PC-2'], index=M.columns)
print (loadings.head(10))

#print the eigenvalues for the first two principle components
print (pca.explained_variance_ratio_) 

#Explained variance
pca = PCA().fit(X_std)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()
예제 #4
0
파일: PCA.py 프로젝트: jessipapa/PCA
print('NumPy covariance matrix: \n%s' %np.cov(X_std.T))

#Perform eigendecomposition on covariance matrix
cov_mat = np.cov(X_std.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
print('Eigenvalues in descending order:')
for i in eig_pairs:
    print(i[0])

#Variable loadings
pca = PCA(n_components=2)
pca.fit_transform(df1)
i = np.identity(df1.shape[1]) 
coef = pca.transform(i)
loadings = pd.DataFrame(coef, columns=['PC-1', 'PC-2'], index=M.columns)
print loadings.head(10)

#print the eigenvalues for the first two principle components
print pca.explained_variance_ratio_ 

#Explained variance
pca = PCA().fit(X_std)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()