Python categorical_to_binary示例，mlfromscratch.utils.data_manipulation.categorical_to_binary Python示例

示例#1

0

显示文件

    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.val_error:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(
                X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_batches = int(n_samples / self.batch_size)

        bar = progressbar.ProgressBar(widgets=bar_widgets)

        for i in bar(range(self.n_iterations)):
            X_, y_ = shuffle_data(X_train, y_train)

            batch_t_error = 0  # Mean batch training error
            batch_v_error = 0  # Mean batch validation error
            for idx in np.array_split(np.arange(n_samples), n_batches):
                X_batch, y_batch = X_[idx], y_[idx]

                # Calculate output
                y_pred = self._forward_pass(X_batch)

                # Calculate the cross entropy training loss
                loss = np.mean(self.cross_ent.loss(y_batch, y_pred))
                batch_t_error += loss

                loss_grad = self.cross_ent.gradient(y_batch, y_pred)

                # Update the NN weights
                self._backward_pass(loss_grad=loss_grad)

                if self.val_error:
                    # Calculate the validation error
                    y_val_pred = self._forward_pass(X_validate)
                    loss = np.mean(self.cross_ent.loss(y_validate, y_val_pred))
                    batch_v_error += loss

            batch_t_error /= n_batches
            batch_v_error /= n_batches
            self.errors["training"].append(batch_t_error)
            self.errors["validation"].append(batch_v_error)

示例#2

0

显示文件

    def fit(self, X, y):
        # Convert the categorical data to binary
        y = categorical_to_binary(y.astype("int"))

        n_samples, n_features = np.shape(X)
        n_batches = int(n_samples / self.batch_size)

        bar = progressbar.ProgressBar(widgets=bar_widgets)
        for _ in bar(range(self.n_iterations)):
            X_, y_ = shuffle_data(X, y)

            batch_t_error = 0  # Mean batch training error
            for idx in np.array_split(np.arange(n_samples), n_batches):
                X_batch, y_batch = X_[idx], y_[idx]

                # Calculate output
                y_pred = self._forward_pass(X_batch)

                # Calculate the cross entropy training loss
                loss = np.mean(self.cross_ent.loss(y_batch, y_pred))
                batch_t_error += loss

                loss_grad = self.cross_ent.gradient(y_batch, y_pred)

                # Update the NN weights
                self._backward_pass(loss_grad=loss_grad)

            batch_t_error /= n_batches
            self.errors["training"].append(batch_t_error)
            if self.X_val.any():
                # Calculate the validation error
                y_val_p = self._forward_pass(self.X_val)
                loss = np.mean(self.cross_ent.loss(self.y_val, y_val_p))
                self.errors["validation"].append(loss)

示例#3

0

显示文件

文件： xgboost.py 项目： zhmxu/ML-From-Scratch

    def fit(self, X, y):
        y = categorical_to_binary(y)

        y_pred = np.zeros(np.shape(y))

        for i in self.bar(range(self.n_estimators)):
            tree = self.trees[i]
            y_and_pred = np.concatenate((y, y_pred), axis=1)
            tree.fit(X, y_and_pred)
            update_pred = tree.predict(X)

            y_pred -= np.multiply(self.learning_rate, update_pred)

示例#4

0

显示文件

文件： xgboost.py 项目： dajreamdigital/ML-From-Scratch

    def fit(self, X, y):
        y = categorical_to_binary(y)

        y_pred = np.zeros(np.shape(y))

        for i in self.bar(range(self.n_estimators)):
            tree = self.trees[i]
            y_and_pred = np.concatenate((y, y_pred), axis=1)
            tree.fit(X, y_and_pred)
            update_pred = tree.predict(X)

            y_pred -= np.multiply(self.learning_rate, update_pred)

示例#5

0

显示文件

文件： xgboost.py 项目： Simon717/ML-From-Scratch

    def fit(self, X, y):
        y = categorical_to_binary(y)

        y_pred = np.zeros(np.shape(y))
        for i, tree in enumerate(self.trees):
            y_and_pred = np.concatenate((y, y_pred), axis=1)
            tree.fit(X, y_and_pred)
            update_pred = tree.predict(X)

            y_pred -= np.multiply(self.learning_rate, update_pred)

            if self.debug:
                progress = 100 * (i / self.n_estimators)
                print("Progress: %.2f%%" % progress)

示例#6

0

显示文件

 def __init__(self,
              n_iterations,
              batch_size,
              optimizer,
              loss,
              validation_data=None):
     self.n_iterations = n_iterations
     self.optimizer = optimizer
     self.layers = []
     self.errors = {"training": [], "validation": []}
     self.cross_ent = loss()
     self.batch_size = batch_size
     self.X_val = self.y_val = np.empty([])
     if validation_data:
         self.X_val, self.y_val = validation_data
         self.y_val = categorical_to_binary(self.y_val.astype("int"))

示例#7

0

显示文件

    def fit(self, X, y):
        # Convert the categorical data to binary
        y = categorical_to_binary(y.astype("int"))

        n_samples = np.shape(X)[0]
        n_batches = int(n_samples / self.batch_size)

        bar = progressbar.ProgressBar(widgets=bar_widgets)
        for _ in bar(range(self.n_iterations)):
            idx = range(n_samples)
            np.random.shuffle(idx)

            batch_t_error = 0  # Mean batch training error
            for i in range(n_batches):
                X_batch = X[idx[i * self.batch_size:(i + 1) * self.batch_size]]
                y_batch = y[idx[i * self.batch_size:(i + 1) * self.batch_size]]

                # Calculate output
                y_pred = self._forward_pass(X_batch)

                # Calculate the cross entropy training loss
                loss = np.mean(self.loss_function.loss(y_batch, y_pred))
                batch_t_error += loss

                loss_grad = self.loss_function.gradient(y_batch, y_pred)

                # Backprop. Update weights
                self._backward_pass(loss_grad=loss_grad)

            # Save the epoch mean error
            self.errors["training"].append(batch_t_error / n_batches)
            if self.X_val.any():
                # Calculate the validation error
                y_val_p = self._forward_pass(self.X_val)
                validation_loss = np.mean(
                    self.loss_function.loss(self.y_val, y_val_p))
                self.errors["validation"].append(validation_loss)

示例#8

0

显示文件

 def fit(self, X, y):
     y = categorical_to_binary(y)
     super(GradientBoostingClassifier, self).fit(X, y)

示例#9

0

显示文件

文件： multilayer_perceptron.py 项目： Simon717/ML-From-Scratch

    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, self.n_hidden)) + a
        self.w0 = (b - a) * np.random.random((1, self.n_hidden)) + a
        self.V = (b - a) * np.random.random((self.n_hidden, n_outputs)) + a
        self.v0 = (b - a) * np.random.random((1, n_outputs)) + a

        # Error history
        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0

        for i in range(self.n_iterations):

            # Calculate hidden layer
            hidden_layer_input = X_train.dot(self.W) + self.w0
            hidden_layer_output = self.activation.function(hidden_layer_input)
            # Calculate output layer
            output_layer_input = hidden_layer_output.dot(self.V) + self.v0
            output = self.activation.function(output_layer_input)

            # Calculate the error
            error = y_train - output
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Calculate the loss gradient
            output_gradient = -2 * (y_train - output) * \
                self.activation.gradient(output_layer_input)
            hidden_gradient = output_gradient.dot(
                self.V.T) * self.activation.gradient(hidden_layer_input)

            # Calcualte the gradient with respect to each weight term
            grad_wrt_v = hidden_layer_output.T.dot(output_gradient)
            grad_wrt_v0 = np.ones((1, n_samples)).dot(output_gradient)
            grad_wrt_w = X_train.T.dot(hidden_gradient)
            grad_wrt_w0 = np.ones((1, n_samples)).dot(hidden_gradient)

            # Update weights
            # Move against the gradient to minimize loss
            self.V          = self.v_opt.update(w=self.V, grad_wrt_w=grad_wrt_v)
            self.v0      = self.v0_opt.update(w=self.v0, grad_wrt_w=grad_wrt_v0)
            self.W          = self.w_opt.update(w=self.W, grad_wrt_w=grad_wrt_w)
            self.w0      = self.w0_opt.update(w=self.w0, grad_wrt_w=grad_wrt_w0)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # # If the validation error has been for more than 50 iterations
                    # # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0


        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                validation, = plt.plot(range(i+1), validation_errors, label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()

示例#10

0

显示文件

文件： perceptron_opt.py 项目： dajreamdigital/ML-From-Scratch

    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, n_outputs)) + a
        self.w0 = (b - a) * np.random.random((1, n_outputs)) + a

        # Error history
        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0

        # Lambda function that calculates the neuron outputs
        neuron_output = lambda w, b: self.activation.function(np.dot(X_train, w) + b)

        # Lambda function that calculates the loss gradient
        loss_grad = lambda w, b: -2 * (y_train - neuron_output(w, b)) * \
            self.activation.gradient(np.dot(X_train, w) + b)

        # Lambda functions that calculates the gradient of the loss with 
        # respect to each weight term. Allows for computation of loss gradient at different
        # coordinates.
        grad_func_wrt_w = lambda w: X_train.T.dot(loss_grad(w, self.w0))
        grad_func_wrt_w0 = lambda b: np.ones((1, n_samples)).dot(loss_grad(self.W, b))

        # Optimize paramaters for n_iterations
        for i in range(self.n_iterations):

            # Training error
            error = y_train - neuron_output(self.W, self.w0)
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Update weights
            self.W = self.w_opt.update(w=self.W, grad_func=grad_func_wrt_w)
            self.w0 = self.w0_opt.update(w=self.w0, grad_func=grad_func_wrt_w0)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # If the validation error has been for more than 50 iterations
                    # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0

        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                validation, = plt.plot(range(i+1), validation_errors, label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()

示例#11

0

显示文件

文件： perceptron_opt.py 项目： Simon717/ML-From-Scratch

    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(
                X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, n_outputs)) + a
        self.w0 = (b - a) * np.random.random((1, n_outputs)) + a

        # Error history
        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0

        # Lambda function that calculates the neuron outputs
        neuron_output = lambda w, b: self.activation.function(
            np.dot(X_train, w) + b)

        # Lambda function that calculates the loss gradient
        loss_grad = lambda w, b: -2 * (y_train - neuron_output(w, b)) * \
            self.activation.gradient(np.dot(X_train, w) + b)

        # Lambda functions that calculates the gradient of the loss with
        # respect to each weight term. Allows for computation of loss gradient at different
        # coordinates.
        grad_func_wrt_w = lambda w: X_train.T.dot(loss_grad(w, self.w0))
        grad_func_wrt_w0 = lambda b: np.ones(
            (1, n_samples)).dot(loss_grad(self.W, b))

        # Optimize paramaters for n_iterations
        for i in range(self.n_iterations):

            # Training error
            error = y_train - neuron_output(self.W, self.w0)
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Update weights
            self.W = self.w_opt.update(w=self.W, grad_func=grad_func_wrt_w)
            self.w0 = self.w0_opt.update(w=self.w0, grad_func=grad_func_wrt_w0)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors
                       ) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # If the validation error has been for more than 50 iterations
                    # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0

        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i + 1),
                                     training_errors,
                                     label="Training Error")
                validation, = plt.plot(range(i + 1),
                                       validation_errors,
                                       label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i + 1),
                                     training_errors,
                                     label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()

示例#12

0

显示文件

文件： gradient_boosting.py 项目： dajreamdigital/ML-From-Scratch

 def fit(self, X, y):
     y = categorical_to_binary(y)
     super(GradientBoostingClassifier, self).fit(X, y)