Exemplo n.º 1
0
    def back_propagation(self, x, y):
        """
        根据单个训练样本计算梯度
        :param x: 样本的输入
        :param y: 样本的标签
        :return: 一个tuple,包括weights和biases的梯度
        """
        zs, activations = self.feed_forward(x)
        delta_weights = [np.zeros(e.shape) for e in self.weights]
        delta_biases = [np.zeros(e.shape) for e in self.biases]
        # 计算输出层的误差
        delta = utils.cross_entropy_derivative(
            activations[-1], y) * utils.sigmoid_derivative(zs[-1])
        # 后向传播误差
        delta_weights[-1] = np.dot(delta, activations[-2].transpose())
        delta_biases[-1] = delta
        for last in range(2, self.num_layers):
            z = zs[-last]
            delta = np.dot(self.weights[-last + 1].transpose(),
                           delta) * utils.sigmoid_derivative(z)
            delta_weights[-last] = np.dot(delta,
                                          activations[-last - 1].transpose())
            delta_biases[-last] = delta

        return delta_weights, delta_biases
Exemplo n.º 2
0
 def back_propagate(self, output, label):
     # normalize_output()
     error = label - output
     delta = np.multiply(error, sigmoid_derivative(self.layers[-1].input))
     delta_weights = []
     delta_bias = []
     for i in xrange(len(self.weights), 0, -1):
         delta_w = self.layers[i - 1].output * delta.transpose() * self.learning_rate
         delta_b = self.learning_rate * delta
         delta_weights.append(delta_w)
         delta_bias.append(delta_b)
         error = self.weights[i-1] * delta
         delta = np.multiply(error, sigmoid_derivative(self.layers[i - 1].input))
     self.update_weights(delta_weights, delta_bias)
    def compute_gradient_back_propagation(self, inputs, expected_outputs):
        """
        Computes the gradient with respect to the NN's parameters using back propagation.

        :param inputs: inputs to the network.
        :type inputs: list of numpy matrices.
        :param expected_outputs: expected outputs of the network.
        :type expected_outputs: list of numpy matrices.
        :return weights_gradient: gradients of the weights at each layer.
        :rtype weights_gradient: L-dimensional list of numpy matrices.
        :return biases_gradient: gradients of the biases at each layer.
        :rtype biases_gradient: L-dimensional list of numpy matrices.
        """
        weights_gradient = [None] * 3
        biases_gradient = [None] * 3
        weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs))
        weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens))
        biases_gradient[1] = np.zeros((self.num_hiddens, 1))
        biases_gradient[2] = np.zeros((self.num_outputs, 1))
        # Add logic to compute the gradients
        num_cases = len(inputs)
        outputs = [None] * num_cases
        for i in range(num_cases):
            z, a = self.forward_propagation(inputs[i])
            delta_2 = a[2] - expected_outputs[i]
            delta_1 = np.multiply(np.dot(self.weights[2].T, delta_2),
                                  sigmoid_derivative(z[1]))
            biases_gradient[2] += (1 / num_cases) * delta_2
            weights_gradient[2] += (1 / num_cases) * np.dot(delta_2, a[1].T)
            biases_gradient[1] += (1 / num_cases) * delta_1
            weights_gradient[1] += (1 / num_cases) * np.dot(delta_1, a[0].T)

        return weights_gradient, biases_gradient
Exemplo n.º 4
0
    def compute_gradient_back_propagation(self, inputs, expected_outputs):
        """
        Computes the gradient with respect to the NN's parameters using back propagation.

        :param inputs: inputs to the network.
        :type inputs: list of numpy matrices.
        :param expected_outputs: expected outputs of the network.
        :type expected_outputs: list of numpy matrices.
        :return weights_gradient: gradients of the weights at each layer.
        :rtype weights_gradient: L-dimensional list of numpy matrices.
        :return biases_gradient: gradients of the biases at each layer.
        :rtype biases_gradient: L-dimensional list of numpy matrices.
        """
        weights_gradient = [None] * 3
        biases_gradient = [None] * 3
        weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs))
        weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens))
        biases_gradient[1] = np.zeros((self.num_hiddens, 1))
        biases_gradient[2] = np.zeros((self.num_outputs, 1))
        # Add logic to compute the gradients
        num_cases = len(inputs)
        outputs = [None] * num_cases
        for i in range(num_cases):
            z, a = self.forward_propagation(inputs[i])
            outputs[i] = a[-1]
            y = expected_outputs
            yhat = outputs
            delta = [None] * 3
            delta[1] = np.zeros((self.num_hiddens, 1))
            delta[2] = np.zeros((self.num_outputs, 1))
            for c in range(self.num_outputs):
                delta[2][c] = yhat[i][c] - y[i][c]
            for k in range(self.num_hiddens):
                aux = 0
                for c in range(self.num_outputs):
                    b = np.array(self.weights[2][c])
                    aux += b[0][k] * delta[2][c] * sigmoid_derivative(z[1][k])
                delta[1][k] = aux
            for c in range(self.num_outputs):
                for k in range(self.num_hiddens):
                    weights_gradient[2][c][k] += np.asscalar(delta[2][c] *
                                                             a[1][k])
                biases_gradient[2][c] += delta[2][c]
            for k in range(self.num_hiddens):
                for j in range(self.num_inputs):
                    weights_gradient[1][k][j] += np.asscalar(delta[1][k] *
                                                             a[0][j])
                biases_gradient[1][k] += delta[1][k]
        weights_gradient[1] /= num_cases
        weights_gradient[2] /= num_cases
        biases_gradient[1] /= num_cases
        biases_gradient[2] /= num_cases
        return weights_gradient, biases_gradient
Exemplo n.º 5
0
    def compute_gradient_back_propagation(self, inputs, expected_outputs):
        """
        Computes the gradient with respect to the NN's parameters using back propagation.

        :param inputs: inputs to the network.
        :type inputs: list of numpy matrices.
        :param expected_outputs: expected outputs of the network.
        :type expected_outputs: list of numpy matrices.
        :return weights_gradient: gradients of the weights at each layer.
        :rtype weights_gradient: L-dimensional list of numpy matrices.
        :return biases_gradient: gradients of the biases at each layer.
        :rtype biases_gradient: L-dimensional list of numpy matrices.
        """
        weights_gradient = [None] * 3
        biases_gradient = [None] * 3
        weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs))
        weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens))
        biases_gradient[1] = np.zeros((self.num_hiddens, 1))
        biases_gradient[2] = np.zeros((self.num_outputs, 1))

        num_cases = len(inputs)
        outputs = [None] * num_cases

        for i in range(num_cases):
            z, a = self.forward_propagation(inputs[i])
            outputs[i] = a[-1]
            delta1 = np.zeros((self.num_hiddens, 1))
            delta2 = np.zeros((self.num_outputs, 1))

            # Get error from the last layer
            delta2 = outputs[i] - expected_outputs[i]
            # Update biases gradient based on mean
            biases_gradient[2] += delta2 / num_cases
            # Get error from the hidden layer
            for c in range(self.num_outputs):
                delta1 = delta1 + np.multiply(
                    np.dot(self.weights[2][c].T, delta2[c]),
                    sigmoid_derivative(z[1]))
            # Update biases gradient based on mean
            biases_gradient[1] += delta1 / num_cases
            # Update weights gradient based on mean
            weights_gradient[2] += np.dot(delta2, a[1].T) / num_cases
            weights_gradient[1] += np.dot(delta1, a[0].T) / num_cases

        return weights_gradient, biases_gradient
Exemplo n.º 6
0
def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100):
    """
    [Section 18.6.4]
    Linear classifier with logistic regression.
    """
    idx_i = dataset.inputs
    idx_t = dataset.target
    examples = dataset.examples
    num_examples = len(examples)

    # X transpose
    X_col = [dataset.values[i] for i in idx_i]  # vertical columns of X

    # add dummy
    ones = [1 for _ in range(len(examples))]
    X_col = [ones] + X_col

    # initialize random weights
    num_weights = len(idx_i) + 1
    w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights)

    for epoch in range(epochs):
        err = []
        h = []
        # pass over all examples
        for example in examples:
            x = [1] + example
            y = sigmoid(dot_product(w, x))
            h.append(sigmoid_derivative(y))
            t = example[idx_t]
            err.append(t - y)

        # update weights
        for i in range(len(w)):
            buffer = [x * y for x, y in zip(err, h)]
            w[i] = w[i] + learning_rate * (dot_product(buffer, X_col[i]) /
                                           num_examples)

    def predict(example):
        x = [1] + example
        return sigmoid(dot_product(w, x))

    return predict
Exemplo n.º 7
0
    def compute_gradient_back_propagation(self, inputs, expected_outputs):
        """
        Computes the gradient with respect to the NN's parameters using back propagation.

        :param inputs: inputs to the network.
        :type inputs: list of numpy matrices.
        :param expected_outputs: expected outputs of the network.
        :type expected_outputs: list of numpy matrices.
        :return weights_gradient: gradients of the weights at each layer.
        :rtype weights_gradient: L-dimensional list of numpy matrices.
        :return biases_gradient: gradients of the biases at each layer.
        :rtype biases_gradient: L-dimensional list of numpy matrices.
        """
        weights_gradient = [None] * 3
        biases_gradient = [None] * 3
        weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs))
        weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens))
        biases_gradient[1] = np.zeros((self.num_hiddens, 1))
        biases_gradient[2] = np.zeros((self.num_outputs, 1))

        # Add logic to compute the gradients
        for i in range(len(inputs)):
            z, a = self.forward_propagation(inputs[i])

            # dz[2] = a[2] - y
            # dW[2] = dz[2] a[1].T
            # db[2] = dz[2]
            # dz[1] = W[2].T dz[2] * g[1]'(z[1])
            # dW[1] = dz[1] x.T
            # db[1] = dz[1]

            dz2 = np.matrix(a[2] - expected_outputs[i])
            weights_gradient[2] += dz2 * a[1].T
            biases_gradient[2] += dz2

            dz1 = np.matrix(np.multiply(self.weights[2].T * dz2, sigmoid_derivative(z[1])))
            weights_gradient[1] += dz1 * inputs[i].T
            biases_gradient[1] += dz1

        return weights_gradient, biases_gradient
Exemplo n.º 8
0
    def compute_gradient_back_propagation(self, inputs, expected_outputs):
        """
        Computes the gradient with respect to the NN's parameters using back propagation.

        :param inputs: inputs to the network.
        :type inputs: (num_inputs, num_samples) numpy array.
        :param expected_outputs: expected outputs of the network.
        :type expected_outputs: (num_outputs, num_samples) numpy array.
        :return weights_gradient: gradients of the weights at each layer.
        :rtype weights_gradient: 3-dimensional list of numpy arrays.
        :return biases_gradient: gradients of the biases at each layer.
        :rtype biases_gradient: 3-dimensional list of numpy arrays.
        """
        weights_gradient = [None] * 3
        biases_gradient = [None] * 3

        # Add logic to compute the gradients

        delta = [None] * 3
        z, a = self.forward_propagation(inputs)
        y = expected_outputs
        y_hat = a[-1]

        delta[2] = (y_hat - y)
        delta[1] = (self.weights[2].T @ delta[2]) * sigmoid_derivative(z[1])

        m = inputs.shape[1]

        weights_gradient[2] = delta[2] @ a[1].T / m
        biases_gradient[2] = np.array(np.mean(delta[2], axis=1))
        biases_gradient[2] = biases_gradient[2].reshape(
            biases_gradient[2].shape[0], 1)
        weights_gradient[1] = delta[1] @ a[0].T / m
        biases_gradient[1] = np.array(np.mean(delta[1], axis=1))
        biases_gradient[1] = biases_gradient[1].reshape(
            biases_gradient[1].shape[0], 1)

        return weights_gradient, biases_gradient
Exemplo n.º 9
0
def train(X, y, alpha=1, epochs=10000, classes=[]):

    print("Training with alpha:%s" % (str(alpha)))
    print("Input matrix: %sx%s    Output matrix: %sx%s" %
          (len(X), len(X[0]), len(X[0]), len(classes)))
    np.random.seed(1)

    last_mean_error = 1

    synapse_0 = 2 * np.random.random((len(X[0]), len(classes))) - 1

    layer_0 = X
    for j in iter(range(epochs + 1)):

        layer_1 = sigmoid(np.dot(layer_0, synapse_0))

        layer_1_error = y - layer_1
        if (j % 1000) == 0:
            error = np.mean(np.abs(layer_1_error))
            if error >= last_mean_error or error < 1e-2:
                print('break:', error, ', ', last_mean_error)
                break
            print('delta after ', j, ' iters:', error)
            last_mean_error = error

        layer_1_delta = layer_1_error * sigmoid_derivative(layer_1)

        synapse_0_weight_update = layer_0.T.dot(layer_1_delta)

        synapse_0 += alpha * synapse_0_weight_update

    now = datetime.datetime.now()
    synapse = {'synapse0': synapse_0.tolist()}
    with open('synapses.json', 'w') as outfile:
        json.dump(synapse, outfile, indent=4)
    print('Train done.')
Exemplo n.º 10
0
def BackPropagationLearner(dataset, net, learning_rate, epochs):
    """[Figure 18.23] The back-propagation algorithm for multilayer network"""
    # Initialise weights
    for layer in net:
        for node in layer:
            node.weights = random_weights(min_value=-0.5,
                                          max_value=0.5,
                                          num_weights=len(node.weights))

    examples = dataset.examples
    '''
    As of now dataset.target gives an int instead of list,
    Changing dataset class will have effect on all the learners.
    Will be taken care of later
    '''
    o_nodes = net[-1]
    i_nodes = net[0]
    o_units = len(o_nodes)
    idx_t = dataset.target
    idx_i = dataset.inputs
    n_layers = len(net)

    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)

    for epoch in range(epochs):
        # Iterate over each example
        for e in range(len(examples)):
            i_val = inputs[e]
            t_val = targets[e]

            # Activate input layer
            for v, n in zip(i_val, i_nodes):
                n.value = v

            # Forward pass
            for layer in net[1:]:
                for node in layer:
                    inc = [n.value for n in node.inputs]
                    in_val = dotproduct(inc, node.weights)
                    node.value = node.activation(in_val)

            # Initialize delta
            delta = [[] for i in range(n_layers)]

            # Compute outer layer delta

            # Error for the MSE cost function
            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]
            # The activation function used is the sigmoid function
            delta[-1] = [
                sigmoid_derivative(o_nodes[i].value) * err[i]
                for i in range(o_units)
            ]

            # Backward pass
            h_layers = n_layers - 2
            for i in range(h_layers, 0, -1):
                layer = net[i]
                h_units = len(layer)
                nx_layer = net[i + 1]
                # weights from each ith layer node to each i + 1th layer node
                w = [[node.weights[k] for node in nx_layer]
                     for k in range(h_units)]

                delta[i] = [
                    sigmoid_derivative(layer[j].value) *
                    dotproduct(w[j], delta[i + 1]) for j in range(h_units)
                ]

            #  Update weights
            for i in range(1, n_layers):
                layer = net[i]
                inc = [node.value for node in net[i - 1]]
                units = len(layer)
                for j in range(units):
                    layer[j].weights = vector_add(
                        layer[j].weights,
                        scalar_vector_product(learning_rate * delta[i][j],
                                              inc))

    return net
    [1],
    [0],
    [0],
    [1],
])

layer0 = X
hidden_width = 4
synapse0 = generate_random_synapse(get_layer_width(X), hidden_width)
synapse1 = generate_random_synapse(hidden_width, get_layer_width(y))

iterations = int(os.getenv("ITERATIONS", "100000"))
for i in range(iterations):
    layer1 = forward_propagate(layer0, synapse0, sigmoid)
    layer2 = forward_propagate(layer1, synapse1, sigmoid)

    layer2_error = y - layer2
    layer2_delta = layer2_error * sigmoid_derivative(layer2)

    layer1_error = np.dot(layer2_delta, synapse1.T)
    layer1_delta = layer1_error * sigmoid_derivative(layer1)

    synapse1 += np.dot(layer1.T, layer2_delta)
    synapse0 += np.dot(layer0.T, layer1_delta)

    if DEBUG & (i % (iterations / 10) == 0):
        print("Error: ", layer2_error)

print("Output after training:")
print(layer2)
Exemplo n.º 12
0
def BackPropagationLearner(dataset, net, learning_rate, epochs):
    """[Figure 18.23] The back-propagation algorithm for multilayer network"""
    # Initialise weights
    for layer in net:
        for node in layer:
            node.weights = random_weights(min_value=-0.5, max_value=0.5,
                                          num_weights=len(node.weights))

    examples = dataset.examples
    '''
    As of now dataset.target gives an int instead of list,
    Changing dataset class will have effect on all the learners.
    Will be taken care of later
    '''
    o_nodes = net[-1]
    i_nodes = net[0]
    o_units = len(o_nodes)
    idx_t = dataset.target
    idx_i = dataset.inputs
    n_layers = len(net)

    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)

    for epoch in range(epochs):
        # Iterate over each example
        for e in range(len(examples)):
            i_val = inputs[e]
            t_val = targets[e]

            # Activate input layer
            for v, n in zip(i_val, i_nodes):
                n.value = v

            # Forward pass
            for layer in net[1:]:
                for node in layer:
                    inc = [n.value for n in node.inputs]
                    in_val = dotproduct(inc, node.weights)
                    node.value = node.activation(in_val)

            # Initialize delta
            delta = [[] for i in range(n_layers)]

            # Compute outer layer delta

            # Error for the MSE cost function
            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]
            # The activation function used is the sigmoid function
            delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]

            # Backward pass
            h_layers = n_layers - 2
            for i in range(h_layers, 0, -1):
                layer = net[i]
                h_units = len(layer)
                nx_layer = net[i+1]
                # weights from each ith layer node to each i + 1th layer node
                w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]

                delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])
                            for j in range(h_units)]

            #  Update weights
            for i in range(1, n_layers):
                layer = net[i]
                inc = [node.value for node in net[i-1]]
                units = len(layer)
                for j in range(units):
                    layer[j].weights = vector_add(layer[j].weights,
                                                  scalar_vector_product(
                                                  learning_rate * delta[i][j], inc))

    return net
    def compute_gradient_back_propagation(self, inputs, expected_outputs):
        """
        Computes the gradient with respect to the NN's parameters using back propagation

        :param inputs: inputs to the network.
        :type inputs: list of numpy matrices.
        :param expected_outputs: expected outputs of the network.
        :type expected_outputs: list of numpy matrices.
        :return weights_gradient: gradients of the weights at each layer.
        :rtype weights_gradient: L-dimensional list of numpy matrices.
        :return biases_gradient: gradients of the biases at each layer.
        :rtype biases_gradient: L-dimensional list of numpy matrices.
        """
        weights_gradient = [None] * 3
        biases_gradient = [None] * 3

        weights_gradient[1] = np.zeros((self.num_hiddens, self.num_inputs))
        weights_gradient[2] = np.zeros((self.num_outputs, self.num_hiddens))
        biases_gradient[1] = np.zeros((self.num_hiddens, 1))
        biases_gradient[2] = np.zeros((self.num_outputs, 1))

        #read the inputs
        num_cases = len(inputs)
        outputs = [None] * num_cases

        for i in range(num_cases):
            delta = [None] * 3
            delta[1] = np.zeros((self.num_hiddens, 1))
            delta[2] = np.zeros((self.num_outputs, 1))
            #catch the outputs
            z, a = self.forward_propagation(inputs[i])
            outputs[i] = a[-1]
            #cálculo do delta_
            delta[2] = outputs[i] - expected_outputs[i]

            #cálculo do delta_0
            for k in range(self.num_hiddens):
                deltak = 0
                for c in range(self.num_outputs):
                    weight = self.weights[2][c, k]
                    delta_1 = delta[2][c, 0]
                    function = sigmoid_derivative(z[1][k, 0])
                    deltak = deltak + weight * delta_1 * function

                delta[1][k, 0] = deltak

            #cálculo do weights_gradient
            for c in range(self.num_outputs):
                for k in range(self.num_hiddens):
                    delta_1 = delta[2][c, 0]
                    output_1 = a[1][k, 0]
                    weights_gradient[2][c, k] += delta_1 * output_1

            #weights_gradient[2] = np.matrix(aux6)
            for k in range(self.num_hiddens):
                for j in range(self.num_inputs):
                    delta_1 = delta[1][k, 0]
                    output_1 = a[0][j, 0]
                    weights_gradient[1][k, j] += delta_1 * output_1

            biases_gradient[1] += delta[1]
            biases_gradient[2] += delta[2]

        weights_gradient[1] /= num_cases
        weights_gradient[2] /= num_cases
        biases_gradient[1] /= num_cases
        biases_gradient[2] /= num_cases
        return weights_gradient, biases_gradient
Exemplo n.º 14
0
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid, momentum=False, beta=0.903):
    """[Figure 18.23] The back-propagation algorithm for multilayer networks"""
    # Initialise weights
    for layer in net:
        for node in layer:
            node.weights = random_weights(min_value=-0.5, max_value=0.5,
                                          num_weights=len(node.weights))

    examples = dataset.examples
    '''
    As of now dataset.target gives an int instead of list,
    Changing dataset class will have effect on all the learners.
    Will be taken care of later.
    '''
    o_nodes = net[-1]
    i_nodes = net[0]
    o_units = len(o_nodes)
    idx_t = dataset.target
    idx_i = dataset.inputs
    n_layers = len(net)

    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)

    for epoch in range(epochs):
        # Iterate over each example
        for e in range(len(examples)):
            i_val = inputs[e]
            t_val = targets[e]

            # Activate input layer
            for v, n in zip(i_val, i_nodes):
                n.value = v

            # Finding the values of the nodes through forward propogation
            for layer in net[1:]:
                for node in layer:
                    inc = [n.value for n in node.inputs]
                    in_val = dotproduct(inc, node.weights)
                    node.value = node.activation(in_val)

            # Initialize delta which stores the values of the gradients for each activation units
            delta = [[] for _ in range(n_layers)]
		
            #initializing the velocity_gradient
            if momentum == True:
                v_dw = [[0 for i in range(len(_))] for _ in net]

            # Compute outer layer delta

            # Error for the MSE cost function
            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]

            # The activation function used is relu or sigmoid function
            # First backward fast 
            if node.activation == sigmoid:
                delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            elif node.activation == relu:
                delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            elif node.activation == tanh:
                delta[-1] = [tanh_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            elif node.activation == elu:
                delta[-1] = [elu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]
            else:
                delta[-1] = [leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)]


            # Propogating backward and finding gradients of nodes for each hidden layer
            h_layers = n_layers - 2
            for i in range(h_layers, 0, -1):
                layer = net[i]
                h_units = len(layer)
                nx_layer = net[i+1]

                # weights from each ith layer node to each i + 1th layer node
                w = [[node.weights[k] for node in nx_layer] for k in range(h_units)]

                if activation == sigmoid:
                    delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])
                            for j in range(h_units)]
                elif activation == relu:
                    delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])
                            for j in range(h_units)]
                elif activation == tanh:
                    delta[i] = [tanh_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])
                            for j in range(h_units)]
                elif activation == elu:
                    delta[i] = [elu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])
                            for j in range(h_units)]
                else:
                    delta[i] = [leaky_relu_derivative(layer[j].value) * dotproduct(w[j], delta[i+1])
                            for j in range(h_units)]

            #optimization with velocity gradient
            t_ = epoch + 1

            if momentum == True:
                if epoch == 0:

                    for i in range(len(delta)):
                        for j in range(len(delta[i])):
                            v_dw[i][j] = ((1-beta)*delta[i][j])/(1-beta**(t_+1))
                else:

                    for i in range(len(delta)):
                        for j in range(len(delta[i])):
                            v_dw[i][j] = (beta*v_dw[i][j]+(1-beta)*delta[i][j])/(1-beta**(t_+1))





            #  Update weights with normal gradient descent
            if momentum == False:
                for i in range(1, n_layers):
                    layer = net[i]
                    inc = [node.value for node in net[i-1]]
                    units = len(layer)
                    for j in range(units):
                        layer[j].weights = vector_add(layer[j].weights,
                                                    scalar_vector_product(
                                                    learning_rate * delta[i][j], 
                                                    inc
                                                    ))                                               
            # Update weights with velocity gradient optimizer in gradient descent
            else:
                for i in range(1, n_layers):
                    layer = net[i]
                    inc = [node.value for node in net[i-1]]
                    units = len(layer)
                    for j in range(units):
                        layer[j].weights = vector_add(layer[j].weights,
                                                    scalar_vector_product(
                                                    learning_rate * v_dw[i][j], 
                                                    inc
                                                    ))
                                                    


    return net
Exemplo n.º 15
0
def BackPropagationLearner(dataset,
                           net,
                           learning_rate,
                           epochs,
                           activation=sigmoid):
    """
    [Figure 18.23]
    The back-propagation algorithm for multilayer networks.
    """
    # initialise weights
    for layer in net:
        for node in layer:
            node.weights = random_weights(min_value=-0.5,
                                          max_value=0.5,
                                          num_weights=len(node.weights))

    examples = dataset.examples
    # As of now dataset.target gives an int instead of list,
    # Changing dataset class will have effect on all the learners.
    # Will be taken care of later.
    o_nodes = net[-1]
    i_nodes = net[0]
    o_units = len(o_nodes)
    idx_t = dataset.target
    idx_i = dataset.inputs
    n_layers = len(net)

    inputs, targets = init_examples(examples, idx_i, idx_t, o_units)

    for epoch in range(epochs):
        # iterate over each example
        for e in range(len(examples)):
            i_val = inputs[e]
            t_val = targets[e]

            # activate input layer
            for v, n in zip(i_val, i_nodes):
                n.value = v

            # forward pass
            for layer in net[1:]:
                for node in layer:
                    inc = [n.value for n in node.inputs]
                    in_val = dot_product(inc, node.weights)
                    node.value = node.activation(in_val)

            # initialize delta
            delta = [[] for _ in range(n_layers)]

            # compute outer layer delta

            # error for the MSE cost function
            err = [t_val[i] - o_nodes[i].value for i in range(o_units)]

            # calculate delta at output
            if node.activation == sigmoid:
                delta[-1] = [
                    sigmoid_derivative(o_nodes[i].value) * err[i]
                    for i in range(o_units)
                ]
            elif node.activation == relu:
                delta[-1] = [
                    relu_derivative(o_nodes[i].value) * err[i]
                    for i in range(o_units)
                ]
            elif node.activation == tanh:
                delta[-1] = [
                    tanh_derivative(o_nodes[i].value) * err[i]
                    for i in range(o_units)
                ]
            elif node.activation == elu:
                delta[-1] = [
                    elu_derivative(o_nodes[i].value) * err[i]
                    for i in range(o_units)
                ]
            elif node.activation == leaky_relu:
                delta[-1] = [
                    leaky_relu_derivative(o_nodes[i].value) * err[i]
                    for i in range(o_units)
                ]
            else:
                return ValueError("Activation function unknown.")

            # backward pass
            h_layers = n_layers - 2
            for i in range(h_layers, 0, -1):
                layer = net[i]
                h_units = len(layer)
                nx_layer = net[i + 1]

                # weights from each ith layer node to each i + 1th layer node
                w = [[node.weights[k] for node in nx_layer]
                     for k in range(h_units)]

                if activation == sigmoid:
                    delta[i] = [
                        sigmoid_derivative(layer[j].value) *
                        dot_product(w[j], delta[i + 1]) for j in range(h_units)
                    ]
                elif activation == relu:
                    delta[i] = [
                        relu_derivative(layer[j].value) *
                        dot_product(w[j], delta[i + 1]) for j in range(h_units)
                    ]
                elif activation == tanh:
                    delta[i] = [
                        tanh_derivative(layer[j].value) *
                        dot_product(w[j], delta[i + 1]) for j in range(h_units)
                    ]
                elif activation == elu:
                    delta[i] = [
                        elu_derivative(layer[j].value) *
                        dot_product(w[j], delta[i + 1]) for j in range(h_units)
                    ]
                elif activation == leaky_relu:
                    delta[i] = [
                        leaky_relu_derivative(layer[j].value) *
                        dot_product(w[j], delta[i + 1]) for j in range(h_units)
                    ]
                else:
                    return ValueError("Activation function unknown.")

            # update weights
            for i in range(1, n_layers):
                layer = net[i]
                inc = [node.value for node in net[i - 1]]
                units = len(layer)
                for j in range(units):
                    layer[j].weights = vector_add(
                        layer[j].weights,
                        scalar_vector_product(learning_rate * delta[i][j],
                                              inc))

    return net