label='Versicolor') plt.title('Iris dataset feature') plt.xlabel('Sepal length [cm]') plt.ylabel('Petal length [cm]') plt.legend(loc='upper left') plt.show() # Train the Perceptron model perceptron = Perceptron(eta=0.1, n_iter=10) perceptron.fit(X, y) # Plot the number of updates per iteration plt.plot(range(1, len(perceptron.errors_) + 1), perceptron.errors_, marker='o') plt.title('Perceptron - Updates per epoch') plt.xlabel('Epochs') plt.ylabel('Number of updates') plt.show() # Plot decision regions plot_decision_regions(X, y, classifier=perceptron) plt.title('Perceptron - Decision regions') plt.xlabel('Sepal length [cm]') plt.ylabel('Petal length [cm]') plt.legend(loc='upper left') plt.show()
# Plot dataset plt.scatter(X_xor[y_xor == 1, 0], X_xor[y_xor == 1, 1], c='b', marker='x', label='1') plt.scatter(X_xor[y_xor == -1, 0], X_xor[y_xor == -1, 1], c='r', marker='s', edgecolor='black', label='-1') plt.legend(loc='best') plt.tight_layout() plt.show() # SVM classifier with Gaussian kernel (RBF) for non-linearly separable data svm = SVC(kernel='rbf', C=1.0, gamma=0.10, random_state=1) svm.fit(X_xor, y_xor) # Plot decision regions plot_decision_regions(X=X_xor, y=y_xor, classifier=svm) plt.title('SVM - linearly inseparable') plt.legend(loc='upper left') plt.tight_layout() plt.show()
# Load iris data (load all 150 examples) iris_data = datasets.load_iris() # Separate train and test samples (train: 70%, test: 30% default) and apply feature standardization X_train_std, X_test_std, y_train, y_test = init_dataset(dataset=iris_data) # Random Forest classifier (is an ensemble of Decision Trees) using Gini impurity method for Information Gain forest = RandomForestClassifier(criterion='gini', n_estimators=25, random_state=1, n_jobs=2) forest.fit(X_train_std, y_train) # Put together train and test data X_std = np.vstack((X_train_std, X_test_std)) y = np.hstack((y_train, y_test)) # Plot decision regions plot_decision_regions(X=X_std, y=y, classifier=forest, test_idx=range(105, 150)) plt.title('Random Forest - Scikit') plt.xlabel('Petal length [standardized]') plt.ylabel('Petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
y = np.where(y == 'Iris-setosa', 0, 1) # Set Setosa to 0 and Versicolor to 1 X = df.iloc[ 0:100, [2, 3]].values # Extract sepal length (pos 0) and petal length (pos 2) # Separete data for tests X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y) X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)] y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)] # Train model lrgd = LogisticRegressionGD(eta=0.05, n_iter=1000, random_state_seed=1) lrgd.fit(X_train_01_subset, y_train_01_subset) # Plot decision regions plot_decision_regions(X=X_train_01_subset, y=y_train_01_subset, classifier=lrgd) plt.title('Logistic Regression (GD) - Decision regions') plt.xlabel('Petal length [standardized]') plt.ylabel('Petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show()
X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y) # Standardize the features stds = StandardScaler() X_train_std = stds.fit_transform(X_train) X_test_std = stds.transform(X_test) pca = PCA(n_components=2) lr = LogisticRegression(multi_class='ovr', random_state=1, solver='lbfgs') # Dimensionality reduction X_train_pca = pca.fit_transform(X_train_std) X_test_pca = pca.transform(X_test_std) # Fit the logistic regression model on the reduced dataset lr.fit(X_train_pca, y_train) # Plot decision regions plot_decision_regions(X_train_pca, y_train, classifier=lr) plt.xlabel('PC1') plt.ylabel('PC2') plt.legend(loc='lower left') plt.tight_layout() plt.show()
ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Cost') plt.show() # Improve the gradient descent through feature scaling with standardization method # Standardize features X_std = np.copy(X) for j in range(X_std.ndim): X_std[:, j] = (X[:, j] - X[:, j].mean()) / X[:, j].std() # Train model ada_gd = AdalineGD(n_iter=15, eta=0.01).fit(X_std, y) # Plot decision regions plot_decision_regions(X_std, y, classifier=ada_gd) plt.title('ADALINE_GD - Decision regions') plt.xlabel('Sepal length [standardized]') plt.ylabel('Petal length [standardized]') plt.legend(loc='upper left') plt.show() # Plot cost per epoch plt.plot(range(1, len(ada_gd.cost_) + 1), ada_gd.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Cost') plt.title('ADALINE_GD - Cost per epoch') plt.show()
# Separate train and test samples (train: 70%, test: 30% default) and apply feature standardization X_train_std, X_test_std, y_train, y_test = init_dataset(dataset=iris_data) # Decision Tree using Gini impurity method for Information Gain tree_model = DecisionTreeClassifier(criterion='gini', max_depth=4, random_state=1) tree_model.fit(X_train_std, y_train) # Put together train and test data X_std = np.vstack((X_train_std, X_test_std)) y = np.hstack((y_train, y_test)) # Plot decision regions plot_decision_regions(X=X_std, y=y, classifier=tree_model, test_idx=range(105, 150)) plt.title('Decision Tree - Scikit') plt.xlabel('Petal length [standardized]') plt.ylabel('Petal width [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show() # Display tree graph using sklearn # tree.plot_tree(tree_model) # plt.show() # Display tree graph using graphviz
stds = StandardScaler() X_train_std = stds.fit_transform(X_train) X_test_std = stds.transform(X_test) lda = LDA(n_components=2) lr = LogisticRegression(multi_class='ovr', random_state=1, solver='lbfgs') # Dimensionality reduction X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) # Fit the logistic regression model on the reduced dataset lr.fit(X_train_lda, y_train) # Plot decision regions of train data plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel('LD1') plt.ylabel('LD2') plt.legend(loc='lower left') plt.tight_layout() plt.show() # Plot decision regions of train data plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel('LD1') plt.ylabel('LD2') plt.legend(loc='lower left')