Exemplo n.º 1
0
    def _backpropagate(self, output_word_index):

        dE_dz_y = self.y.copy() # don't remove the copy() part
        dE_dz_y[range(len(output_word_index)), output_word_index] -= 1.
        self.dE_dWy = np.dot(self.h.T, dE_dz_y)

        dE_dh = np.dot(dE_dz_y, self.Wy.T)

        self.dE_dWe = {}
        self.dE_dW = np.zeros_like(self.W)
        self.dE_dWr = np.zeros_like(self.Wr)
        self.dE_dWip = np.zeros_like(self.Wip)
        self.dE_dWfp = np.zeros_like(self.Wfp)
        self.dE_dWop = np.zeros_like(self.Wop)
        self.dE_dWp = np.zeros_like(self.Wp)

        dE_dm_tm1 = 0.
        dE_dh_tm1 = 0.

        m = self.m

        pause_history = self.pause_history if self.use_pauses else [None]*len(self.word_history)

        for pauses, words, W, Wr, Wip, Wfp, Wop, x, m_tm1, h_tm1, z, i, ig, fg, og in reversed(zip(
                                                pause_history, self.word_history,
                                                self.W_history, self.Wr_history,
                                                self.Wip_history, self.Wfp_history, self.Wop_history,
                                                self.x_history, self.m_tm1_history, self.h_tm1_history, self.z_history,
                                                self.i_history, self.ig_history,
                                                self.fg_history, self.og_history)):

            dE_dh = dE_dh + dE_dh_tm1
            dE_dog = dE_dh * z * Sigmoid.dy_dz(y=og)
            dE_dz = dE_dh * og * self.hidden_activation.dy_dz(y=z)
            dE_dm = dE_dz + dE_dm_tm1 + dE_dog * Wop
            dE_dfg = dE_dm * m_tm1 * Sigmoid.dy_dz(y=fg)
            dE_di = dE_dm * ig * self.hidden_activation.dy_dz(y=i)
            dE_dig = dE_dm * i * Sigmoid.dy_dz(y=ig)
            dE_dm_tm1 = dE_dm * fg + dE_dig * Wip + dE_dfg * Wfp

            self.dE_dWip += (dE_dig * m_tm1).sum(0)
            self.dE_dWfp += (dE_dfg * m_tm1).sum(0)
            self.dE_dWop += (dE_dog * m).sum(0)

            d = np.hstack((dE_di, dE_dig, dE_dfg, dE_dog))
            
            dE_dx = np.dot(d, W.T) * self.hidden_activation.dy_dz(y=x)
            dE_dh_tm1 = np.dot(d, Wr.T)

            self.dE_dW += np.dot(x.T, d)
            self.dE_dWr += np.dot(h_tm1.T, d)

            for word, dE_dx_word in izip(words, dE_dx):
                self.dE_dWe[word] = self.dE_dWe.get(word, 0.) + dE_dx_word

            if self.use_pauses:
                self.dE_dWp += np.dot(pauses.T, dE_dx)

            dE_dh = 0.
            m = m_tm1
Exemplo n.º 2
0
    def predict(self, input_word_index, pause_duration=None):
        assert self.initialized, "initialize or load before using"

        self.t_lstm.predict(input_word_index, pause_duration, compute_only_features=True)

        self.m_tm1 = self.m
        self.h_tm1 = self.h

        r = np.dot(self.h_tm1, self.Wr)
        z1 = np.dot(self.t_lstm.h, self.W)
        if self.use_pauses:
            z1 += np.dot(pause_duration[:,np.newaxis], self.Wp)

        z = self.slice(r, self.hidden_size, 0) + self.slice(z1, self.hidden_size, 0)
        self.i  = self.hidden_activation.y(z)

        z = self.slice(r, self.hidden_size, 1) + self.slice(z1, self.hidden_size, 1) + self.m_tm1 * self.Wip
        self.ig = Sigmoid.y(z)
        
        z = self.slice(r, self.hidden_size, 2) + self.slice(z1, self.hidden_size, 2) + self.m_tm1 * self.Wfp
        self.fg = Sigmoid.y(z)
        
        self.m = self.i * self.ig + self.m_tm1 * self.fg
        
        z = self.slice(r, self.hidden_size, 3) + self.slice(z1, self.hidden_size, 3) + self.m * self.Wop
        self.og = Sigmoid.y(z)
        
        self.z = self.hidden_activation.y(self.m)
        self.h = self.z * self.og

        z_y = np.dot(self.h, self.Wy)
        self.y = Softmax.y(z=z_y)

        self._remember_state(pause_duration)
Exemplo n.º 3
0
 def __init__(self, learning_rate=.1, momentum=0.3, gradient_descent=True):
     self.param = None
     self.learning_rate = learning_rate
     self.momentum = momentum
     self.gradient_descent = gradient_descent
     self.sigmoid = Sigmoid()
     self.log_loss = LogisticLoss()
    def test_sigma(self):
        self.assertAlmostEqual(Sigmoid.activation(0), 0.5, places=2)
        self.assertAlmostEqual(Sigmoid.activation(50), 1, places=2)
        self.assertAlmostEqual(Sigmoid.activation(-50), 0, places=2)

        self.assertAlmostEqual(Sigmoid.activation(1), 0.731, places=2)
        self.assertAlmostEqual(Sigmoid.activation(-1), 0.2689, places=2)
Exemplo n.º 5
0
 def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
     self.n_hidden = n_hidden
     self.n_iterations = n_iterations
     self.learning_rate = learning_rate
     self.hidden_activation = Sigmoid()
     self.output_activation = Softmax()
     self.loss = CrossEntropy()
Exemplo n.º 6
0
    def __init__(self, activation_function):
        Layer.__init__(self)

        # Instantiate the chosen activation function
        if activation_function is "relu":
            self.activation_function = Relu()
        if activation_function is "sigmoid":
            self.activation_function = Sigmoid()
Exemplo n.º 7
0
    def _backpropagate(self, output_word_index):

        dE_dz_y = self.y.copy()  # don't remove the copy() part
        dE_dz_y[range(len(output_word_index)), output_word_index] -= 1.
        self.dE_dWy = np.dot(self.h.T, dE_dz_y)

        dE_dh = np.dot(dE_dz_y,
                       self.Wy.T) * self.hidden_activation.dy_dz(y=self.h)

        self.dE_dWr = np.zeros_like(self.Wr)
        self.dE_dW = np.zeros_like(self.W)
        self.dE_dWip = np.zeros_like(self.Wip)
        self.dE_dWfp = np.zeros_like(self.Wfp)
        self.dE_dWop = np.zeros_like(self.Wop)
        self.dE_dWp = np.zeros_like(self.Wp)

        dE_dm_tm1 = 0.
        dE_dh_tm1 = 0.

        m = self.m

        pause_history = self.pause_history if self.use_pauses else [
            None
        ] * len(self.h_tm1_history)

        for pauses, Wr, Wip, Wfp, Wop, t_lstm_h, m_tm1, h_tm1, z, i, ig, fg, og in reversed(
                list(
                    zip(pause_history, self.Wr_history, self.Wip_history,
                        self.Wfp_history, self.Wop_history,
                        self.t_lstm_h_history, self.m_tm1_history,
                        self.h_tm1_history, self.z_history, self.i_history,
                        self.ig_history, self.fg_history, self.og_history))):

            dE_dh = dE_dh + dE_dh_tm1
            dE_dog = dE_dh * z * Sigmoid.dy_dz(y=og)
            dE_dz = dE_dh * og * self.hidden_activation.dy_dz(y=z)
            dE_dm = dE_dz + dE_dm_tm1 + dE_dog * Wop
            dE_dfg = dE_dm * m_tm1 * Sigmoid.dy_dz(y=fg)
            dE_di = dE_dm * ig * self.hidden_activation.dy_dz(y=i)
            dE_dig = dE_dm * i * Sigmoid.dy_dz(y=ig)
            dE_dm_tm1 = dE_dm * fg + dE_dig * Wip + dE_dfg * Wfp

            self.dE_dWip += (dE_dig * m_tm1).sum(0)
            self.dE_dWfp += (dE_dfg * m_tm1).sum(0)
            self.dE_dWop += (dE_dog * m).sum(0)

            d = np.hstack((dE_di, dE_dig, dE_dfg, dE_dog))

            dE_dh_tm1 = np.dot(d, Wr.T)
            if self.use_pauses:
                self.dE_dWp += np.dot(pauses.T, d)
            self.dE_dW += np.dot(t_lstm_h.T, d)
            self.dE_dWr += np.dot(h_tm1.T, d)

            dE_dh = 0.
            m = m_tm1
Exemplo n.º 8
0
class LogisticRegression():
    """The Logistic Regression classifier. 
    Parameters:
    -----------
    learning_rate: float
        The step length that will be taken when following the negative gradient during
        training.
    gradient_descent: boolean
        True or false depending if gradient descent should be used when training. If 
        false then we use batch optimization by least squares.
    """
    def __init__(self, learning_rate=.1, gradient_descent=True):
        self.param = None
        self.learning_rate = learning_rate
        self.gradient_descent = gradient_descent
        self.sigmoid = Sigmoid()
        self.log_loss = LogisticLoss()

    def fit(self, X, y, n_iterations=4000):
        # Add dummy ones for bias weights
        X = np.insert(X, 0, 1, axis=1)

        n_samples, n_features = np.shape(X)

        # Initial parameters between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.param = (b - a) * np.random.random((n_features, )) + a

        # Tune parameters for n iterations
        for i in range(n_iterations):
            # Make a new prediction
            y_pred = self.sigmoid.function(X.dot(self.param))
            if self.gradient_descent:
                # Move against the gradient of the loss function with
                # respect to the parameters to minimize the loss
                self.param -= self.learning_rate * self.log_loss.gradient(
                    y, X, self.param)
            else:
                # Make a diagonal matrix of the sigmoid gradient column vector
                diag_gradient = make_diagonal(
                    self.sigmoid.gradient(X.dot(self.param)))
                # Batch opt:
                self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot(
                    X.T).dot(
                        diag_gradient.dot(X).dot(self.param) + y - y_pred)

    def predict(self, X):
        # Add dummy ones for bias weights
        X = np.insert(X, 0, 1, axis=1)
        # Print a final prediction
        dot = X.dot(self.param)
        y_pred = np.round(self.sigmoid.function(dot)).astype(int)
        return y_pred
    def test_get_final_layer_error_for_arrays(self):
        quadratic = cost_functions.QuadraticCost(neural_net=self.net)

        z_last = np.array([3, -1], float)
        z_last_prime = Sigmoid.gradient(z_last)
        y = np.array([0, 0.5], float)
        a_last = Sigmoid.activation(z_last)
        nabla = quadratic.get_final_layer_error(a_last, y, z_last_prime)

        self.assertAlmostEqual(nabla[0], (a_last[0] - y[0]) * z_last_prime[0], places=2)
        self.assertAlmostEqual(nabla[1], (a_last[1] - y[1]) * Sigmoid.gradient(z_last[1]),
                               places=2)
Exemplo n.º 10
0
    def predict(self,
                input_word_index,
                pause_duration=None,
                compute_only_features=False):
        assert self.initialized, "initialize or load before using"

        self.m_tm1 = self.m
        self.h_tm1 = self.h

        r = np.dot(self.h_tm1, self.Wr)

        z = self.We[input_word_index]
        if self.use_pauses:
            z += np.dot(pause_duration[:, np.newaxis], self.Wp)
        self.x = self.hidden_activation.y(z)

        z1 = np.dot(self.x, self.W)

        z = self.slice(r, self.hidden_size, 0) + self.slice(
            z1, self.hidden_size, 0)
        self.i = self.hidden_activation.y(z)

        z = self.slice(r, self.hidden_size, 1) + self.slice(
            z1, self.hidden_size, 1) + self.m_tm1 * self.Wip
        self.ig = Sigmoid.y(z)

        z = self.slice(r, self.hidden_size, 2) + self.slice(
            z1, self.hidden_size, 2) + self.m_tm1 * self.Wfp
        self.fg = Sigmoid.y(z)

        self.m = self.i * self.ig + self.m_tm1 * self.fg

        z = self.slice(r, self.hidden_size, 3) + self.slice(
            z1, self.hidden_size, 3) + self.m * self.Wop
        self.og = Sigmoid.y(z)

        self.z = self.hidden_activation.y(self.m)
        self.h = self.z * self.og

        if not compute_only_features:
            z_y = np.dot(self.h, self.Wy)
            self.y = Softmax.y(z=z_y)

        if self.use_pauses:
            self._remember_state(input_word_index, pause_duration[:,
                                                                  np.newaxis])
        else:
            self._remember_state(input_word_index)
Exemplo n.º 11
0
class Activation(Layer):
    def __init__(self, activation_function):
        Layer.__init__(self)

        # Instantiate the chosen activation function
        if activation_function is "relu":
            self.activation_function = Relu()
        if activation_function is "sigmoid":
            self.activation_function = Sigmoid()

    def forward_propagation(self, X):
        # Save the input for the layer
        self.X = X
        A = self.activation_function(self.X)

        # If there is a subsequent layer then return it's output, else return the output of this layer
        if self.next_layer is not None:
            return self.next_layer.forward_propagation(A)
        else:
            return A

    def backward_propogation(self, error_signal):
        # If there is a preceding layer then pass the error_signal
        if self.previous_layer is not None:
            self.previous_layer.backward_propogation(
                error_signal * self.activation_function.derivative(self.X))

    def initalise(self):
        self.shape = self.previous_layer.shape
Exemplo n.º 12
0
    def test_get_final_layer_error_for_1_element_vectors(self):
        cross_entropy = cost_functions.CrossEntropyCost(self.net)
        z_last = np.array([3], float)
        z_last_prime = Sigmoid.gradient(z_last)

        y = np.array([0], float)
        a_last = Sigmoid.activation(z_last)
        nabla = cross_entropy.get_final_layer_error(a_last, y, z_last_prime)
        self.assertAlmostEqual(nabla[0], (a_last - y), places=2)

        z_last = np.array([-1], float)
        z_last_prime = Rectifier.gradient(z_last)
        y = np.array([0.5], float)
        a_last = Sigmoid.activation(z_last)
        nabla = cross_entropy.get_final_layer_error(a_last, y, z_last_prime)
        self.assertAlmostEqual(nabla[0], (a_last - y), places=2)
 def forward(self, inputs):
     self.x = inputs.reshape(1, X_DIM)
     self.y1 = np.matmul(self.x, self.w1) + self.b1
     self.y1 = LeakyReLU(self.y1)
     self.y2 = np.matmul(self.y1, self.w2) + self.b2
     self.y2 = LeakyReLU(self.y2)
     self.y3 = np.matmul(self.y2, self.w3) + self.b3
     self.y = Sigmoid(self.y3)
     return self.y
Exemplo n.º 14
0
    def __init__(self, grad_wrt_theta=True):
        sigmoid = Sigmoid()
        self.log_func = sigmoid.function
        self.log_grad = sigmoid.gradient

        if grad_wrt_theta:
            self.gradient = self._grad_wrt_theta
        if not grad_wrt_theta:
            self.gradient = self._grad_wrt_pred
            self.hess = self._hess_wrt_pred
Exemplo n.º 15
0
class LogisticRegression():
    """ Logistic Regression classifier.
    Parameters:
    -----------
    learning_rate: float
        The step length that will be taken when following the negative gradient during
        training.
    gradient_descent: boolean
        True or false depending if gradient descent should be used when training. If
        false then we use batch optimization by least squares.
    """
    def __init__(self, learning_rate=.1, gradient_descent=True):
        self.param = None
        self.learning_rate = learning_rate
        self.gradient_descent = gradient_descent
        self.sigmoid = Sigmoid()

    def _initialize_parameters(self, X):
        n_features = np.shape(X)[1]
        # Initialize parameters between [-1/sqrt(N), 1/sqrt(N)]
        limit = 1 / math.sqrt(n_features)
        self.param = np.random.uniform(-limit, limit, (n_features, ))

    def fit(self, X, y, n_iterations=1000):
        self._initialize_parameters(X)
        # Tune parameters for n iterations
        for i in range(n_iterations):
            # Make a new prediction
            y_pred = self.sigmoid(X.dot(self.param))
            if self.gradient_descent:
                # Move against the gradient of the loss function with
                # respect to the parameters to minimize the loss
                self.param -= self.learning_rate * -(y - y_pred).dot(X)
            else:
                # Make a diagonal matrix of the sigmoid gradient column vector
                diag_gradient = make_diagonal(
                    self.sigmoid.gradient(X.dot(self.param)))
                # diag_gradient = np.zeros((len(X), len(X)))
                # Batch opt:
                self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot(
                    X.T).dot(
                        diag_gradient.dot(X).dot(self.param) + y - y_pred)

    def predict(self, X):
        y_pred = np.round(self.sigmoid(X.dot(self.param))).astype(int)
        return y_pred

    #TODO
    def predict_proba(self, X):
        y_prob = self.sigmoid(X.dot(self.param))
        return [y_prob, y_prob]
    def test_sigma_prime(self):
        self.assertAlmostEqual(Sigmoid.gradient(0), 0.25, places=3)
        self.assertAlmostEqual(Sigmoid.gradient(-50), 0, places=3)
        self.assertAlmostEqual(Sigmoid.gradient(50), 0, places=3)

        self.assertAlmostEqual(Sigmoid.gradient(50),
                               Sigmoid.activation(50) *
                               (1 - Sigmoid.activation(50)),
                               places=3)
Exemplo n.º 17
0
 def __init__(self):
     sigmoid = Sigmoid()
     self.log_func = sigmoid
     self.log_grad = sigmoid.gradient
Exemplo n.º 18
0
class MultilayerPerceptron():
    """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer.
    Unrolled to display the whole forward and backward pass.
    Parameters:
    -----------
    n_hidden: int:
        The number of processing nodes (neurons) in the hidden layer. 
    n_iterations: float
        The number of training iterations the algorithm will tune the weights for.
    learning_rate: float
        The step length that will be used when updating the weights.
    """
    def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01):
        self.n_hidden = n_hidden
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
        self.hidden_activation = Sigmoid()
        self.output_activation = Softmax()
        self.loss = CrossEntropy()

    def _initialize_weights(self, X, y):
        n_samples, n_features = X.shape
        _, n_outputs = y.shape
        # Hidden layer
        limit   = 1 / math.sqrt(n_features)
        self.W  = np.random.uniform(-limit, limit, (n_features, self.n_hidden))
        self.w0 = np.zeros((1, self.n_hidden))
        # Output layer
        limit   = 1 / math.sqrt(self.n_hidden)
        self.V  = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))
        self.v0 = np.zeros((1, n_outputs))

    def fit(self, X, y):

        self._initialize_weights(X, y)

        for i in range(self.n_iterations):

            # ..............
            #  Forward Pass
            # ..............

            # HIDDEN LAYER
            hidden_input = X.dot(self.W) + self.w0  #(1079*64)(64,16)+(1,16) -> (1079,16)+(1,16)->(1079,16)
            hidden_output = self.hidden_activation(hidden_input)
            # OUTPUT LAYER
            output_layer_input = hidden_output.dot(self.V) + self.v0
            y_pred = self.output_activation(output_layer_input)

            # ...............
            #  Backward Pass
            # ...............

            # OUTPUT LAYER
            # Grad. w.r.t input of output layer
            grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) #(1079,10)(1079,10)->(1079,10)
            grad_v = hidden_output.T.dot(grad_wrt_out_l_input)    # (16,1079)(1079,10)->(16,10)
            grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True)  # (1,10)
            # HIDDEN LAYER
            # Grad. w.r.t input of hidden layer     # (1079,10)
            grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input)
            grad_w = X.T.dot(grad_wrt_hidden_l_input)
            grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True)

            # Update weights (by gradient descent)
            # Move against the gradient to minimize loss
            self.V  -= self.learning_rate * grad_v
            self.v0 -= self.learning_rate * grad_v0
            self.W  -= self.learning_rate * grad_w
            self.w0 -= self.learning_rate * grad_w0

    # Use the trained model to predict labels of X
    def predict(self, X):
        # Forward pass:
        hidden_input = X.dot(self.W) + self.w0
        hidden_output = self.hidden_activation(hidden_input)
        output_layer_input = hidden_output.dot(self.V) + self.v0
        y_pred = self.output_activation(output_layer_input)
        return y_pred
Exemplo n.º 19
0
 def __init__(self, learning_rate=.1, gradient_descent=True):
     self.param = None
     self.learning_rate = learning_rate
     self.gradient_descent = gradient_descent
     self.sigmoid = Sigmoid()
import random

import numpy as np
from shallow_network import ShallowNetwork

from activation_functions import Sigmoid, LeakyRelu

# Train the network to behave like a binary "AND" function
training_data = [[[0, 0], [0]], [[0, 1], [0]], [[1, 0], [0]], [[1, 1], [1]]]

# network = ShallowNetwork(2, 1, LeakyRelu(), 0.03)
network = ShallowNetwork(2, 1, Sigmoid(), 0.5)

for training_session in range(10000):
    training_set = random.choice(training_data)
    inputs = training_set[0]
    target_output = training_set[1]
    outputs = network.feed_forward(inputs)
    network.back_propagate(inputs, outputs, target_output)
    error = np.subtract(outputs, target_output)
    print('error:', '{:.4f}'.format(abs(error[0])), 'target_output',
          target_output, 'output:', outputs)
Exemplo n.º 21
0
import random

import numpy as np
from deep_network import DeepNetwork

from activation_functions import Sigmoid, LeakyRelu

# Train the network to give us the XOR on neuron 0 and the OR on neuron 1
training_data = [[[0, 0], [0, 0]], [[0, 1], [1, 1]], [[1, 0], [1, 1]],
                 [[1, 1], [0, 1]]]

# network = DeepNetwork(2, 4, 1, LeakyRelu(), 0.03)
network = DeepNetwork(2, 4, 2, Sigmoid(), 0.5)

for training_session in range(20000):
    training_set = random.choice(training_data)
    inputs = training_set[0]
    target_output = training_set[1]
    outputs = network.feed_forward(inputs)
    network.back_propagate(inputs, outputs, target_output)
    error = np.subtract(outputs, target_output)
    print('error:',
          ['{:.4f}'.format(abs(error[0])), '{:.4f}'.format(abs(error[1]))],
          'target_output', target_output, 'output:', outputs)
Exemplo n.º 22
0
        score_sum = sum(np.array(self.game.gamegrid.matrix).flatten().tolist())
        penalty = self.fitness_penalty
        return score_max + score_sum + penalty


GENERATION_SIZE = 4
GENRATION_COUNT = 2
PRINT_STEPS = True
WEIGHTS_METHOD = 'random'

nn_parameters = {
    'neurons_per_hidden_layer': [17, 17, 17],
    'input_layer_size': 17,
    'output_layer_size': 4,
    'input_af': Log2(),
    'hidden_af': [TanH(), ReLU(), Sigmoid()],
    'output_af': TanH()
}

game_parameters = {
    'manual_input': True,
    'random': False,
    'steps': 0,
    'sleep': 0
}

ga = GeneticAlgorithm(generation_size=GENERATION_SIZE, **nn_parameters)
ga.add_new_generation(weights_method=WEIGHTS_METHOD)
ga.populate_new_generation(ga[0], ga[0], weights_method=WEIGHTS_METHOD)

for k in range(GENRATION_COUNT):