Exemplo n.º 1
0
    def train(self, x_all, y_all, epochs=25):
        '''trains the neural net using stochastic gradient descent
        inputs: x_all - 2d list like object of all x inputs 
                y_all - 2d array of correct classifications
                epochs - number of passes through data
        outputs: none'''

        # convert x and y to 2D np arrays
        x_all = sh.make_2d(x_all)
        y_all = sh.make_2d(y_all)

        # total number of points to train on
        total_train = len(x_all) * epochs
        # number of points trained on so far
        current_train = 0

        for i in range(epochs):
            # perform multiple passes through the data
            order = np.arange(len(x_all))
            # array of indexes in random order - used for SGD
            np.random.shuffle(order)
            # train on the points in a random order
            for n in order:
                # generate the prediction; pass 2D array pull off first
                # element to get 1D array
                y_pre = self.predict([x_all[n]])[0]
                # calculate the sigma of the last layer - layer L
                self.delta_l[self.total_layers] = self.dx_ds(y_pre)\
                 * self.de_dx(y_pre, y_all[n])

                # go backwards in L and calculate the deltas
                for l in reversed(range(2, self.total_layers + 1)):
                    # back-propagate the deltas
                    self.delta_l[l-1] = self.dx_ds(self.x_l[l-1]) * \
                                        np.dot(self.weights[l], self.delta_l[l])
                # now update the weights
                for l in range(1, self.total_layers + 1):
                    # update weights of each layer
                    self.weights[l] -=\
                        self.lr * np.dot(self.x_l[l-1], np.transpose(self.delta_l[l])) \
                        + self.reg * self.weights[l]  # regularization term
                current_train += 1
                # change divisor to tune how often progress bar prints out
                if current_train % (total_train / 20) == 0:
                    print("%.1f%% of training complete" %
                          (current_train / total_train * 100))

        return
Exemplo n.º 2
0
    def predict(self, x_all):
        '''generates a prediction based on the current weights of the net.
        the result of each for the most recent point is stored in self.x_l
        inputs: x_all - 2d array of each x point to generate a prediction on
        outputs: y_pre - 2d array of predicted y_coordinates'''

        # set up array of result of predictions
        y_pre = np.zeros((len(x_all), self.n_outputs))

        for n in range(len(x_all)):
            # reshape the input into a 2D n x 1 array
            self.x_l[0][1:] = sh.make_2d(x_all[n])
            for l in range(1, self.total_layers + 1):
                # mutliply weights by x of previous layer to get intermediate s
                s_j = np.transpose(
                    np.dot(np.transpose(self.x_l[l - 1]), self.weights[l]))
                # apply activation to get x_l
                self.x_l[l] = self.activation(s_j)
            # prediction is the output of the final layer
            # reshape to be one dimensional array output
            y_pre[n] = (self.x_l[self.total_layers]).reshape(self.n_outputs, )
            # apply softmax if set to do so
            if self.softmax:
                y_pre[n] = sh.apply_softmax(y_pre[n])

        # return array of predictions
        return y_pre
Exemplo n.º 3
0
    def dx_ds(self, x_l):
        '''calculates the derivative dx over ds, equivalent to the
        derivative of the activation function w/ respect to the
        input s
        inputs: x_l - output of activation for a given layer'''

        # dx over ds for different activation functions
        if self.act == "tanh":
            # derivative of tanh activation function
            dx_ds = (1 - x_l**2)

        return sh.make_2d(dx_ds)
Exemplo n.º 4
0
    def de_dx(self, y_pre, y_n):
        '''returns the derivative de over dx, the derivative of the 
        error function with respect to x.
        inputs: y_pre - predicted y value
                y_n - true y value'''

        # de over dx for different loss functions
        if self.loss == "squared":
            # de/dx of squared loss: e = (y-yn) ** 2
            de_dx = 2 * (y_pre - y_n)
        if self.loss == "cce":
            # de/dx of cross-entropy: -(ynlog(y) + (1-yn)log(1-y))
            de_dx = -1 * y_n / y_pre + (1 - y_n) / (1 - y_pre)

        return sh.make_2d(de_dx)