def load_iris_dataset(self):
        # Loading the Iris dataset from scikit-learn.
        # The classes are already converted to integer labels where 0=Iris-Setosa, 1=Iris-Versicolor, 2=Iris-Virginica.
        iris = datasets.load_iris()
        x = iris.data[:, [2, 3]]
        y = iris.target
        print('Class labels:', np.unique(y))

        # plotter data and save it to file
        Plotter.plot_iris_data_set(x, FilesystemUtils.get_test_resources_plot_file_name(
            'ScikitLearn-Iris-Training-Set.png'))

        # Splitting data into 70% training and 30% test data
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1, stratify=y)
        print('Labels counts in y:', np.bincount(y))
        print('Labels counts in y_train:', np.bincount(y_train))
        print('Labels counts in y_test:', np.bincount(y_test))

        # Standardize features
        sc = StandardScaler()
        sc.fit(x_train)
        x_train_std = sc.transform(x_train)
        sc.fit(x_test)
        x_test_std = sc.transform(x_test)

        self.x_train = x_train_std
        self.y_train = y_train

        self.x_test = x_test_std
        self.y_test = y_test
示例#2
0
    def test_logistic_regresssion(self):
        # train the logistic regression model
        logistic_regression = LogisticRegressionBGD(learning_rate=0.05,
                                                    num_epochs=1000)
        logistic_regression.fit(self.x, self.y)

        # plot learning curve
        curve = {
            'cost_length': len(logistic_regression.cost),
            'cost': np.log10(logistic_regression.cost),
            'marker': 'o',
            'x_label': 'Epochs',
            'y_label': 'Number of updates',
            'title': 'Logistic regression - Learning rate 0.05'
        }
        Plotter.plot_learning_curve(
            curve,
            FilesystemUtils.get_test_resources_plot_file_name(
                'logistic_regression/LogisticRegressionBGD-Learning-Curve.png')
        )

        # plot decision boundary
        diagram_options = {
            'x_label': 'sepal length [cm]',
            'y_label': 'petal length [cm]',
            'legend': 'upper left'
        }
        Plotter.plot_decision_boundary(
            self.x,
            self.y,
            classifier=logistic_regression,
            diagram_options=diagram_options,
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'logistic_regression/LogisticRegressionBGD-Decision-Boundary.png'
            ))
    def test_perceptron(self):
        # train the perceptron model
        perceptron = Perceptron(learning_rate=0.1, num_epochs=10)
        perceptron.fit(self.x, self.y)

        # plot learning
        curve = {
            'cost_length': len(perceptron.cost),
            'cost': perceptron.cost,
            'marker': 'o',
            'x_label': 'Epochs',
            'y_label': 'Number of updates',
            'title': 'Perceptron - Learning rate 0.1'
        }
        Plotter.plot_learning_curve(
            curve,
            FilesystemUtils.get_test_resources_plot_file_name(
                'perceptron/Perceptron-Learning-Curve.png'))

        # plot decision boundary
        diagram_options = {
            'x_label': 'sepal length [cm]',
            'y_label': 'petal length [cm]',
            'legend': 'upper left'
        }
        Plotter.plot_decision_boundary(
            self.x,
            self.y,
            classifier=perceptron,
            diagram_options=diagram_options,
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'perceptron/Perceptron-Decision-Boundary.png'))
    def setUp(self):
        # load subset of Iris data
        iris_data_reader = IrisDataReader(
            FilesystemUtils.get_resources_data_file_name('iris/iris.data'))
        self.x, self.y = iris_data_reader.get_data()

        # plotter data and save it to file
        Plotter.plot_iris_data_set(
            self.x,
            FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/Adaline-Training-Set.png'))
示例#5
0
    def load_svm_nonlinear_data_set(self):
        # Load a non linearly separable dataset
        np.random.seed(1)
        x_xor = np.random.randn(200, 2)
        y_xor = np.logical_xor(x_xor[:, 0] > 0, x_xor[:, 1] > 0)
        y_xor = np.where(y_xor, 1, -1)

        # plotter data and save it to file
        Plotter.plot_svm_nonlinear_data_set(
            x_xor, y_xor,
            FilesystemUtils.get_test_resources_plot_file_name(
                'svm/SVM-ScikitLearn-NonLinear-Training-Set.png'))

        self.x_train = x_xor
        self.y_train = y_xor
示例#6
0
    def test_scikit_learn_svm_nonlinear(self):
        # The γ parameter, which we set to gamma=0.1, can be understood as a cut-off parameter for the Gaussian sphere.
        # If we increase the value for γ , we increase the influence or reach of the training samples, which leads to a
        # tighter and bumpier decision boundary.
        svm = SVC(kernel='rbf', random_state=1, gamma=0.10, C=10.0)
        svm.fit(self.x_train, self.y_train)

        diagram_options = {
            'x_label': 'feature 1',
            'y_label': 'feature 2',
            'legend': 'best'
        }
        Plotter.plot_decision_boundary(
            self.x_train,
            self.y_train,
            svm,
            diagram_options,
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'svm/SVM-ScikitLearn-NonLinear-Decision-Boundary.png'))
 def predict_and_evaluate(self, perceptron: Perceptron, image_file_path: str = None):
     # Run predictions and count the number of misclassified examples
     y_pred = perceptron.predict(self.x_test)
     print('Misclassified samples: %d' % (self.y_test != y_pred).sum())
     # Evaluate model accuracy
     # Each classifier in scikit-learn has a score method, which computes a classifier's prediction accuracy by
     # combining the predict call with the accuracy_score call
     print('Accuracy: %.2f' % perceptron.score(self.x_test, self.y_test))
     # Show decision boundary
     diagram_options = {
         'x_label': 'petal length [standardized]',
         'y_label': 'petal width [standardized]',
         'legend': 'upper left',
         'draw_test_samples': range(105, 150)
     }
     x_combined_std = np.vstack((self.x_train, self.x_test))
     y_combined = np.hstack((self.y_train, self.y_test))
     Plotter.plot_decision_boundary(x_combined_std, y_combined, perceptron, diagram_options,
                                    image_file_path=image_file_path)
示例#8
0
    def setUp(self):
        # load subset of Iris data
        iris = datasets.load_iris()
        x_train = iris.data[:, [2, 3]]
        y_train = iris.target

        # consider only 0 and 1 labels
        x_train_01_subset = x_train[(y_train == 0) | (y_train == 1)]
        y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]

        # Standardize features
        sc = StandardScaler()
        sc.fit(x_train_01_subset)
        self.x = sc.transform(x_train_01_subset)

        self.y = y_train_01_subset
        print('Class labels:', np.unique(self.y))

        # plotter data and save it to file
        Plotter.plot_iris_data_set(
            self.x,
            FilesystemUtils.get_test_resources_plot_file_name(
                'logistic_regression/LogisticRegressionBGD-Training-Set.png'))
    def test_adaline(self):
        # train the first  with bigger learning ratio
        adaline1 = AdalineBGD(learning_rate=0.01, num_epochs=30)
        adaline1.fit(self.x, self.y)

        # train the second adaline model with smaller learning ration
        adaline2 = AdalineBGD(learning_rate=0.0001, num_epochs=30)
        adaline2.fit(self.x, self.y)

        # plot multiple learning curves for both adaline trained models
        curves = [{
            'cost_length': len(adaline1.cost),
            'cost': np.log10(adaline1.cost),
            'marker': 'o',
            'x_label': 'Epochs',
            'y_label': 'log(Sum-squared-error)',
            'title': 'Adaline - Learning rate 0.1'
        }, {
            'cost_length': len(adaline2.cost),
            'cost': np.log10(adaline2.cost),
            'marker': 'o',
            'x_label': 'Epochs',
            'y_label': 'log(Sum-squared-error)',
            'title': 'Adaline - Learning rate 0.0001'
        }]
        Plotter.plot_multiple_learning_curves(
            curves,
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineBGD-Learning-Curves.png'))

        # plot decision boundary for divergent model (adaline 1)
        Plotter.plot_decision_boundary(
            self.x,
            self.y,
            classifier=adaline1,
            diagram_options={
                'x_label': 'sepal length [cm]',
                'y_label': 'petal length [cm]',
                'legend': 'upper left'
            },
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineBGD-Decision-Boundary-Divergent.png'))

        # plot decision boundary for convergent model (adaline 2)
        Plotter.plot_decision_boundary(
            self.x,
            self.y,
            classifier=adaline2,
            diagram_options={
                'x_label': 'sepal length [cm]',
                'y_label': 'petal length [cm]',
                'legend': 'upper left'
            },
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineBGD-Decision-Boundary-Convergent.png'))
示例#10
0
    def test_adaline_with_stochastic_update(self):
        # standardize features
        x_std: np.matrix = np.copy(self.x)
        x_std[:, 0] = (self.x[:, 0] - self.x[:, 0].mean()) / self.x[:, 0].std()
        x_std[:, 1] = (self.x[:, 1] - self.x[:, 1].mean()) / self.x[:, 1].std()

        # plotter data and save it to file
        Plotter.plot_iris_data_set(
            x_std,
            FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineSGD-Standardized-Training-Set.png'))

        # train adaline on standardized features with a small number of epochs
        adaline = AdalineSGD(learning_rate=0.01, num_epochs=15)
        adaline.fit(x_std, self.y)

        # plot learning curve
        curve = {
            'cost_length': len(adaline.cost),
            'cost': adaline.cost,
            'marker': 'o',
            'x_label': 'Epochs',
            'y_label': 'log(Sum-squared-error)',
            'title': 'Adaline - Learning rate 0.01'
        }
        Plotter.plot_learning_curve(
            curve,
            FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineSGD-Learning-Curve-Standardized-Features.png'))

        # plot decision boundary
        Plotter.plot_decision_boundary(
            x_std,
            self.y,
            classifier=adaline,
            diagram_options={
                'x_label': 'sepal length [cm]',
                'y_label': 'petal length [cm]',
                'legend': 'upper left'
            },
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineSGD-Decision-Boundary-Standardized-Features.png'
            ))

        adaline.partial_fit(x_std[0, :], self.y[0])