Exemplo n.º 1
0
import sys
sys.path.append("E:/New Folder/utils")

import classification_utils as cutils
from sklearn import model_selection, ensemble, tree, neighbors
import xgboost as xgb

#2-d classification pattern
X, y = cutils.generate_linear_synthetic_data_classification(n_samples=1000,
                                                            n_features=2,
                                                            n_classes=2,
                                                            weights=[0.5, 0.5],
                                                            class_sep=2)
X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000,
                                                                noise=0.1)
cutils.plot_data_2d_classification(X, y)

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=1)
cutils.plot_data_2d_classification(X_train, y_train)

#grid search for parameter values
dt_estimator = tree.DecisionTreeClassifier()
dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))}
final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train,
                                                y_train)
cutils.plot_model_2d_classification(final_estimator, X_train, y_train)

knn_estimator = neighbors.KNeighborsClassifier()
knn_grid = {
    'n_neighbors': list(range(1, 21)),
import sys
path = 'J://utils'
sys.path.append(path)

from sklearn import cluster, manifold
import common_utils as utils
import clustering_utils as cl_utils
import classification_utils as cutils

X, _ = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=300)
utils.plot_data_2d(X)

X, _ = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=300)
utils.plot_data_2d(X)

tsne = manifold.TSNE()
X_tsne = tsne.fit_transform(X)
utils.plot_data_2d(X_tsne)

scoring = 's_score'
kmeans_estimator = cluster.KMeans()
kmeans_grid = {'n_clusters': list(range(2, 7))}
kmeans_final_model = cl_utils.grid_search_best_model_clustering(
    kmeans_estimator, kmeans_grid, X, scoring=scoring)
print(kmeans_final_model.labels_)
print(kmeans_final_model.cluster_centers_)
cl_utils.plot_model_2d_clustering(kmeans_final_model, X)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

X, y = generate_linear_synthetic_data_classification(n_samples=1000, n_features=3, n_redundant=0, n_classes=3, weights=[.3,.3,.4])
plot_data_3d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2)
plot_data_2d(X_isomap, new_window=True)

X, y = generate_nonlinear_synthetic_data_classification2(n_samples=1000)
plot_data_2d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2, 100)
plot_data_2d(X_isomap, new_window=True)

X, y = generate_linear_synthetic_data_regression(n_samples=100, n_features=2, n_informative=2, noise=0)
plot_data_2d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)