X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train) #perceptron algorithm stages = [ ('features', preprocessing.PolynomialFeatures()), ('clf', linear_model.Perceptron(max_iter=1000)) ] perceptron_pipeline = pipeline.Pipeline(stages) perceptron_pipeline_grid = {'features__gamma':[0.1, 0.01, 0.2]} pipeline_object = comutils.grid_search_best_model(perceptron_pipeline, perceptron_pipeline_grid, X_train, y_train) final_estimator = pipeline_object.named_steps['clf'] print(final_estimator.intercept_) print(final_estimator.coef_) cutils.plot_model_2d_classification(pipeline_object, X_train, y_train) #logistic regression algorithm stages = [ ('features', preprocessing.PolynomialFeatures()), ('clf', linear_model.LogisticRegression()) ] lr_pipeline = pipeline.Pipeline(stages) lr_pipeline_grid = {'features__gamma':[0.1, 1, 5,10]} pipeline_object = comutils.grid_search_best_model(lr_pipeline, lr_pipeline_grid, X_train, y_train) final_estimator = pipeline_object.named_steps['clf'] print(final_estimator.intercept_) cutils.plot_model_2d_classification(pipeline_object, X_train, y_train) #linear svm algorithm
weights=[0.5, 0.5], class_sep=2) X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.1) cutils.plot_data_2d_classification(X, y) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train) #grid search for parameter values dt_estimator = tree.DecisionTreeClassifier() dt_grid = {'criterion': ['gini', 'entropy'], 'max_depth': list(range(1, 9))} final_estimator = cutils.grid_search_best_model(dt_estimator, dt_grid, X_train, y_train) cutils.plot_model_2d_classification(final_estimator, X_train, y_train) knn_estimator = neighbors.KNeighborsClassifier() knn_grid = { 'n_neighbors': list(range(1, 21)), 'weights': ['uniform', 'distance'] } final_estimator = cutils.grid_search_best_model(knn_estimator, knn_grid, X_train, y_train) cutils.plot_model_2d_classification(final_estimator, X_train, y_train) rf_estimator = ensemble.RandomForestClassifier() rf_grid = { 'max_depth': list(range(5, 10)), 'n_estimators': list(range(1, 100, 20)) }
model = getModel3() model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit(x=X_train, y=y_train1, verbose=3, epochs=100, batch_size=10, validation_split=0.1) print(model.summary()) print(model.get_weights()) kutils.plot_loss(history) cutils.plot_model_2d_classification(model, X_train, y_train, use_keras=True) y_pred = model.predict_classes(X_test) cutils.performance_metrics_hard_multiclass_classification(model, X_test, y_test, use_keras=True) history = model.fit(x=X_train, y=y_train1, verbose=3, epochs=100, batch_size=32, validation_split=0.1) print(model.summary()) print(model.get_weights())
X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) cutils.plot_data_2d_classification(X_train, y_train) #perceptron algorithm stages = [('features', kutils.KernelTransformer('rbf')), ('clf', linear_model.Perceptron(max_iter=1000))] perceptron_pipeline = pipeline.Pipeline(stages) perceptron_pipeline_grid = {'features__gamma': [0.1, 0.01, 0.2]} pipeline_object = cutils.grid_search_best_model(perceptron_pipeline, perceptron_pipeline_grid, X_train, y_train) final_estimator = pipeline_object.named_steps['clf'] print(final_estimator.intercept_) print(final_estimator.coef_) cutils.plot_model_2d_classification(pipeline_object, X_train, y_train) #logistic regression algorithm stages = [('features', kutils.KernelTransformer('rbf')), ('clf', linear_model.LogisticRegression())] lr_pipeline = pipeline.Pipeline(stages) lr_pipeline_grid = {'features__gamma': [0.1, 1, 5, 10]} pipeline_object = cutils.grid_search_best_model(lr_pipeline, lr_pipeline_grid, X_train, y_train) final_estimator = pipeline_object.named_steps['clf'] print(final_estimator.intercept_) cutils.plot_model_2d_classification(pipeline_object, X_train, y_train) #linear svm algorithm stages = [('features', kutils.KernelTransformer('poly')),
import sys sys.path.append("E:/utils") import common_utils as comutils import classification_utils as cutils from sklearn import preprocessing, linear_model, pipeline X, y = cutils.generate_nonlinear_synthetic_data_classification2(n_samples=1000, noise=0.1) X, y = cutils.generate_nonlinear_synthetic_data_classification3(n_samples=1000, noise=0.1) cutils.plot_data_2d_classification(X, y) stages = [('features', preprocessing.PolynomialFeatures()), ('perceptron', linear_model.Perceptron(max_iter=1000))] perceptron_pipeline = pipeline.Pipeline(stages) perceptron_pipeline_grid = { 'perceptron__penalty': ['l1'], 'perceptron__alpha': [0, 0.1, 0.3, 0.5], 'features__degree': [2, 3] } pipeline_object = comutils.grid_search_best_model(perceptron_pipeline, perceptron_pipeline_grid, X, y) final_estimator = pipeline_object.named_steps['perceptron'] print(final_estimator.intercept_) print(final_estimator.coef_) cutils.plot_model_2d_classification(pipeline_object, X, y)
n_samples=1000, n_features=2, n_classes=4, weights=[0.3, 0.3, 0.3, 0.3]) #make_classification X : array of shape [n_samples, n_features] it will features and its values, #y : array of shape [n_samples] The integer value or labels for class membership of each sample cutils.plot_data_2d_classification(X, y) X_train, X_text, Y_train, Y_test = model_selection.train_test_split( X, y, test_size=0.2, random_state=1) # model_selection.train_test_split - Split arrays or matrices into random train and test subsets #test_size - represent the proportion of the dataset to include in the test split # random_state - If int, random_state is the seed used by the random number generator cutils.plot_data_2d_classification(X_train, Y_train) cutils.plot_data_2d_classification(X_text, Y_test) knn_estimator = neighbors.KNeighborsClassifier() knn_estimator.fit(X_train, Y_train) cutils.plot_model_2d_classification(knn_estimator, X_train, Y_train) y_pred = knn_estimator.predict(X_text) metrics.accuracy_score(Y_test, y_pred) ''' In multilabel classification, this function computes subset accuracy: the set of labels predicted for a sample must exactly match the corresponding set of labels in y_true Y_test = Ground truth (correct) labels y_pred = Predicted labels, as returned by a classifier ''' metrics.confusion_matrix(Y_test, y_pred) ''' By definition a confusion matrix is such that is equal to the number of observations known to be in group but predicted to be in group . Thus in binary classification, the count of true negatives is , false negatives is , true positives is and false positives is
cv_scores = model_selection.cross_val_score(svoting_estimator, X_train, y_train) print(np.mean(cv_scores)) svoting_estimator = ensemble.VotingClassifier([('dt', dt_estimator), ('knn', knn_estimator), ('rf', rf_estimator)], voting='soft', weights=[1, 1, 3]) svoting_estimator.fit(X_train, y_train) cv_scores = model_selection.cross_val_score(svoting_estimator, X_train, y_train) print(np.mean(cv_scores)) svoting_estimator = ensemble.VotingClassifier([('dt', dt_estimator), ('knn', knn_estimator), ('rf', rf_estimator)]) svoting_grid = { 'voting': ['hard', 'soft'], 'weights': [(1, 1, 1), (1, 1, 2), (1, 1, 3)] } svoting_grid_estimator = model_selection.GridSearchCV(svoting_estimator, svoting_grid, cv=10, refit=True) svoting_grid_estimator.fit(X_train, y_train) print(svoting_grid_estimator.best_params_) print(svoting_grid_estimator.best_score_) cutils.plot_model_2d_classification(hvoting_estimator, X_train, y_train)