import sys
path = 'J://utils'
sys.path.append(path)

from sklearn import cluster, manifold
import common_utils as utils
import clustering_utils as cl_utils
import classification_utils as cutils

X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300)
utils.plot_data_2d(X)

X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300)
utils.plot_data_2d(X)

tsne = manifold.TSNE()
X_tsne = tsne.fit_transform(X)
utils.plot_data_2d(X_tsne)

scoring = 's_score'
kmeans_estimator = cluster.KMeans()
kmeans_grid = {'n_clusters': list(range(2, 7))}
kmeans_final_model = cl_utils.grid_search_best_model_clustering(
    kmeans_estimator, kmeans_grid, X, scoring=scoring)
print(kmeans_final_model.labels_)
print(kmeans_final_model.cluster_centers_)
cl_utils.plot_model_2d_clustering(kmeans_final_model, X)
# load the data
digits = datasets.load_digits()
print(digits.data.shape)

plot_digits(digits.data, "Original Digits")

np.corrcoef(digits.data)
# project the 64-dimensional data to a lower dimension
pca = decomposition.PCA(n_components=30, whiten=False)
pca_digits = pca.fit_transform(digits.data)
print(pca.explained_variance_)
print(pca.explained_variance_ratio_)
print(pca.components_)
print(pca_digits.shape)
#incorrect visualization with only 2 pcs
utils.plot_data_2d(pca_digits[:, 0:2])

#tsne on pca  data
tsne = manifold.TSNE()
tsne_digits = tsne.fit_transform(pca_digits)
print(tsne.kl_divergence_)
print(tsne_digits.shape)
cutils.plot_data_2d_classification(tsne_digits, digits.target)

#using GMM
gmm_estimator = mixture.GaussianMixture()
gmm_params = {'n_components': np.arange(50, 200, 10)}
gmm_grid_estimator = model_selection.GridSearchCV(gmm_estimator, gmm_params)
gmm_grid_estimator.fit(pca_digits)
gmm_best_estimator = gmm_grid_estimator.best_estimator_
Exemplo n.º 3
0
import sys

path = 'E://utils'
sys.path.append(path)

from sklearn import cluster
import common_utils as utils
import clustering_utils as cl_utils
import classification_utils as cutils

X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300)
utils.plot_data_2d(X)

X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300)
utils.plot_data_2d(X)

scoring = 's_score'
kmeans_estimator = cluster.KMeans()
kmeans_grid = {'n_clusters': list(range(2, 7))}
kmeans_final_model = cl_utils.grid_search_best_model_clustering(
    kmeans_estimator, kmeans_grid, X, scoring=scoring)
print(kmeans_final_model.labels_)
print(kmeans_final_model.cluster_centers_)
cl_utils.plot_model_2d_clustering(kmeans_final_model, X)
    grid_search_plot_models_regression, plot_coefficients_regression, \
    plot_target_and_transformed_target_regression, rmse, regression_performance
from feature_reduction_utils import feature_reduction_linear_pca, feature_reduction_kernel_pca, \
    feature_reduction_tsne, feature_reduction_isomap
from kernel_utils import GaussianFeatures, KernelTransformer
from sklearn.model_selection import train_test_split
from sklearn import metrics, decomposition, manifold
from sklearn import tree, covariance, linear_model, ensemble, neighbors, svm, model_selection, feature_selection
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

X, y = generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_redundant=0, n_classes=3, weights=[.3,.3,.4])
plot_data_3d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2)
plot_data_2d(X_isomap, new_window=True)

X, y = generate_nonlinear_synthetic_data_classification2(n_samples=1000)
plot_data_2d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)