示例#1
0
 def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.001):
     self.n_hidden = n_hidden
     self.n_iterations = n_iterations
     self.learning_rate = learning_rate
     self.hidden_activation = Sigmoid()
     self.output_activation = Softmax()
     self.loss = CrossEntropy()
示例#2
0
    def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity,
                 max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        #sqrare loss for regression
        # log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

        #initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(
                min__samples_split = self.min_samples_split,
                min_impurity=min_impurity,
                max_depth=self.max_depth
            )
            self.trees.append(tree)
示例#3
0
class GradientBoosting(object):
    """docstring for GradientBoosting"""
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression):
        super(GradientBoosting, self).__init__()
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

# Initailize the trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=min_impurity,
                                  max_depth=self.max_depth)
            self.trees.append(tree)

    def fit(self, X, y):
        # r_im is also the y_pred for the current tree
        r_im = np.full(np.shape(y), np.mean(y, axis=0))

        for i in self.bar(range(self.n_estimators)):
            gradient = self.loss.gradient(y, r_im)
            self.trees[i].fit(X, gradient)
            update = self.trees[i].predict(X)

            # update the residual
            r_im -= np.multiply(self.learning_rate, update)

    def predict(self, X):
        y_pred = np.array([])
        for tree in self.trees:
            update = tree.predict(X)
            update = np.multiply(self.learning_rate, update)
            y_pred = -update if not y_pred.any() else y_pred - update

        if not self.regression:
            y_pred = np.exp(y_pred) / np.expand_dims(
                np.sum(np.exp(y_pred), axis=1), axis=1)
            y_pred = np.argmax(y_pred, axis=1)

        return y_pred
示例#4
0
class GradientBoosting(object):
    """
	"""
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)
        self.n_classes = None

        if self.regression:
            self.train_loss = SquareLoss()
        else:
            self.train_loss = CrossEntropy()

        self.trees = []
        for _ in range(self.n_estimators):

            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=self.min_impurity,
                                  max_depth=self.max_depth)

            self.trees.append(tree)

    def fit(self, X, y):
        X = np.array(X)
        y = y.reshape(len(y), -1)
        y_pred = np.full(y.shape, np.mean(y, axis=0))
        for tree in self.bar(self.trees):
            grad = self.train_loss.backward(y, y_pred)
            tree.fit(X, grad)
            update = tree.predict(X)
            y_pred = y_pred - self.learning_rate * update.reshape(
                len(update), -1)

    def predict(self, X_test):
        X_test = np.array(X_test)
        y_pred = np.array([])
        for tree in self.trees:
            update = tree.predict(X_test)
            update = self.learning_rate * update.reshape(len(update), -1)
            y_pred = -update if not y_pred.any() else y_pred - update
        if not self.regression:
            y_pred = softmax(y_pred)
            y_pred = np.argmax(y_pred, axis=1)
        return y_pred
示例#5
0
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)
        self.n_classes = None

        if self.regression:
            self.train_loss = SquareLoss()
        else:
            self.train_loss = CrossEntropy()

        self.trees = []
        for _ in range(self.n_estimators):

            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=self.min_impurity,
                                  max_depth=self.max_depth)

            self.trees.append(tree)
示例#6
0
class MultilayerPerceptron():
    """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer.
    Unrolled to display the whole forward and backward pass.
    Parameters:
    -----------
    n_hidden: int:
        The number of processing nodes (neurons) in the hidden layer.
    n_iterations: float
        The number of training iterations the algorithm will tune the weights for.
    learning_rate: float
        The step length that will be used when updating the weights.
    """
    def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.001):
        self.n_hidden = n_hidden
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()

    def _initialize_weights(self, X, y):
        n_samples, n_features = X.shape
        _, n_outputs = y.shape
        #hidden layer
        limit = 1 / math.sqrt(n_features)
        self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden))
        self.w0 = np.zeros((1, self.n_hidden))

        #output layer
        limit = 1 / math.sqrt(self.n_hidden)
        self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))
        self.v0 = np.zeros((1, n_outputs))

    def fit(self, X, y):
        self._initialize_weights(X, y)

        for i in range(self.n_iterations):
            # ............
            # Forward Pass
            # ............

            # hidden layer
            hidden_input = X.dot(self.W) + self.w0
            hidden_output = self.hidden_activation(hidden_input)

            #ouput layer
            output_layer_input = hidden_output.dot(self.V) + self.v0
            y_pred = self.output_activation(output_layer_input)

            #..............
            #Backward Pass
            #..............

            #ouput layer
            #Grad w.r.t input of output layer
            grad_wrt_out_l_input = self.loss.gradient(
                y,
                y_pred) * self.output_activation.gradient(output_layer_input)
            grad_v = hidden_output.T.dot(grad_wrt_out_l_input)
            grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True)

            #hidden layer
            #Grad w.r.t input of hidden layer
            grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(
                self.V.T) * self.hidden_activation.gradient(hidden_input)
            grad_w = X.T.dot(grad_wrt_hidden_l_input)
            grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True)

            # update weights (by gradient descent)
            # move against the gradient to minimize loss
            self.V -= self.learning_rate * grad_v
            self.v0 -= self.learning_rate * grad_v0
            self.W -= self.learning_rate * grad_w
            self.w0 -= self.learning_rate * grad_w0

    # use the trained model to predict labels of X
    def predict(self, X):
        # forward pass
        hidden_input = X.dot(self.W) + self.w0
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_output.dot(self.V) + self.v0
        y_pred = self.output_activation(output_layer_input)
        return y_pred
示例#7
0
class GradientBoosting(object):
    """
    super class of gradientboostingclassifier and gradientboostingregressor.
    use a collections of regression trees that trains on predicting the gradient
    of the loss function.

    parameters:
    --------
    n_estimators:int
        the number of classification trees that are used.

    learning_rate:float
        the step length that will be taken when following the negative gradient during training.
    min_samples_split:int
        the minimum number of samples needed to make a split when building atree.
    min_impurity:float
        the minimum impurity required to split the tree further.
    max_depth:int
        the maximum depth of a tree
    regression:boolean
        true or false depending on if we are doing regression or classification.
    """
    def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity,
                 max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        #sqrare loss for regression
        # log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

        #initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(
                min__samples_split = self.min_samples_split,
                min_impurity=min_impurity,
                max_depth=self.max_depth
            )
            self.trees.append(tree)


    def fit(self, X, y):
        y_pred = np.full(np.shape(y), np.mean(y, axis=0))
        for i in self.bar(range(self.n_estimators)):
            gradient = self.loss.gradient(y, y_pred)
            self.trees[i].fit(X, gradient)
            update = self.trees[i].predict(X)
            #update y prediction
            y_pred -= np.multiply(self.learning_rate, update)

    def predict(self, X):
        y_pred = np.array([])
        #make prediction
        for tree in self.trees:
            update = tree.predict(X)
            update = np.multiply(self.learning_rate, update)
            y_pred = -update if not y_pred.any() else y_pred  - update

        if not self.regression:
            #turn into probability distribution
            y_pred = np.exp(y_pred)/np.expand_dims(np.sum(np.exp(y_pred), axis=1),axis=1)
            #set label to the value that maximizes probability
            y_pred = np.argmax(y_pred, axis=1)
        return y_pred