def train(self,
              X,
              y,
              num_passes=1000,
              lr=0.01,
              regularization=0.01,
              to_print=True):
        # add gates
        m_Gate = MultiplyGate()
        a_Gate = AddGate()

        # activate nonlinear layer
        if self.activation_func == 'sigmoid':
            layer = Sigmoid()
        elif self.activation_func == 'tanh':
            layer = Tanh()

        # activate output layer
        if self.output_func == 'softmax':
            output = Softmax()
        elif self.output_func == 'lse':
            output = LSE()

        # for each epoch
        for epoch in range(num_passes):
            # Forward propagation
            input = X
            forward = [(None, None, input)]

            # for each layer except the last one
            for i in range(len(self.W)):
                mul = m_Gate.forward(self.W[i], input)
                add = a_Gate.forward(mul, self.b[i])
                input = layer.forward(add)
                forward.append((mul, add, input))

            # last output of forward propagation is an array: num_samples * num_neurons_last_layer

            # Back propagation
            # derivative of cumulative error from output layer
            dfunc = output.calc_diff(forward[len(forward) - 1][2], y)
            for i in range(len(forward) - 1, 0, -1):
                # 1 layer consists of mul, add and layer
                dadd = layer.backward(forward[i][1], dfunc)

                # dLdb and dLdmul are functions of dLdadd
                db, dmul = a_Gate.backward(forward[i][0], self.b[i - 1], dadd)
                dW, dfunc = m_Gate.backward(self.W[i - 1], forward[i - 1][2],
                                            dmul)

                # Add regularization terms (b1 and b2 don't have regularization terms)
                dW += regularization * self.W[i - 1]

                # Gradient descent parameter update
                self.b[i - 1] += -lr * db
                self.W[i - 1] += -lr * dW

            if to_print and epoch % 100 == 0:
                print("Loss after iteration %i: %f" %
                      (epoch, self.calculate_loss(X, y)))
    def calculate_loss(self, X, y):
        m_Gate = MultiplyGate()
        a_Gate = AddGate()

        if self.activation_func == 'sigmoid':
            layer = Sigmoid()
        elif self.activation_func == 'tanh':
            layer = Tanh()

        if self.output_func == 'softmax':
            output = Softmax()
        elif self.output_func == 'lse':
            output = LSE()

        input = X
        # loop through each layer
        for i in range(len(self.W)):
            # X*W
            mul = m_Gate.forward(self.W[i], input)

            # X*W + b
            add = a_Gate.forward(mul, self.b[i])

            # nonlinear activation
            input = layer.forward(add)

        return output.eval_error(input, y)
    def predict(self, X):
        m_Gate = MultiplyGate()
        a_Gate = AddGate()

        if self.activation_func == 'sigmoid':
            layer = Sigmoid()
        elif self.activation_func == 'tanh':
            layer = Tanh()

        if self.output_func == 'softmax':
            output = Softmax()
        elif self.output_func == 'lse':
            output = LSE()

        input = X
        for i in range(len(self.W)):
            mul = m_Gate.forward(self.W[i], input)
            add = a_Gate.forward(mul, self.b[i])
            input = layer.forward(add)

        if self.output_func == 'softmax':
            probs = output.eval(input)
            return np.argmax(probs, axis=1)
        elif self.output_func == 'lse':
            return (np.greater(input, 0.5)) * 1
Exemplo n.º 4
0
    def calculate_loss(self, X, y):
        mulGate = MultiplyGate()
        addGate = AddGate()
        layer = Tanh()
        softmaxOutput = Softmax()

        input = X
        for i in range(len(self.W)):
            mul = mulGate.forward(self.W[i], input)
            add = addGate.forward(mul, self.b[i])
            input = layer.forward(add)

        return softmaxOutput.loss(input, y)
Exemplo n.º 5
0
    def predict(self, X):
        mulGate = MultiplyGate()
        addGate = AddGate()
        layer = Tanh()
        softmaxOutput = Softmax()

        input = X
        for i in range(len(self.W)):
            mul = mulGate.forward(self.W[i], input)
            add = addGate.forward(mul, self.b[i])
            input = layer.forward(add)

        probs = softmaxOutput.predict(input)
        return np.argmax(probs, axis=1)
Exemplo n.º 6
0
    def train(self,
              X,
              y,
              num_passes=20000,
              epsilon=0.01,
              reg_lambda=0.01,
              print_loss=False):
        mulGate = MultiplyGate()
        addGate = AddGate()
        layer = Tanh()
        softmaxOutput = Softmax()

        for epoch in range(num_passes):
            # Forward propagation
            input = X
            forward = [(None, None, input)]
            for i in range(len(self.W)):
                mul = mulGate.forward(self.W[i], input)
                add = addGate.forward(mul, self.b[i])
                input = layer.forward(add)
                forward.append((mul, add, input))

            # Back propagation
            dtanh = softmaxOutput.diff(forward[len(forward) - 1][2], y)
            for i in range(len(forward) - 1, 0, -1):
                dadd = layer.backward(forward[i][1], dtanh)
                db, dmul = addGate.backward(forward[i][0], self.b[i - 1], dadd)
                dW, dtanh = mulGate.backward(self.W[i - 1], forward[i - 1][2],
                                             dmul)
                # Add regularization terms (b1 and b2 don't have regularization terms)
                dW += reg_lambda * self.W[i - 1]
                # Gradient descent parameter update
                self.b[i - 1] += -epsilon * db
                self.W[i - 1] += -epsilon * dW

            # write log
            nn_log_instance.w = self.W
            nn_log_instance.b = self.b
            nn_log_instance.forward = forward
            nn_log_instance.write_log()

            if print_loss and epoch % 1000 == 0:
                print("Loss after iteration %i: %f" %
                      (epoch, self.calculate_loss(X, y)))
from activation import Tanh
from gate import AddGate, MultiplyGate

mulgate = MultiplyGate()
addgate = AddGate()
tanh = Tanh()


class RNNLayer:
    def foward(self, x, prev_a, waa, wax, wya):
        self.mulax = mulgate.forward(wax, x)
        self.mulaa = mulgate.forward(waa, prev_a)
        self.add = addgate.forward(self.mulax, self.mulaa)
        self.a = tanh.forward(self.add)
        self.mulya = mulgate.forward(wya, a)

## dmulya = y^t - yt
## dV = (y^t - yt) * at

    def backward(self, x, prev_a, waa, wax, wya, diff_a, dmulya):
        self.forward(x, prev_a, waa, wax, wya)
        dV, dav = mulgate.backward(wya, self.a, dmulya)
        da = dav + diff_a
Exemplo n.º 8
0
from activation import Tanh
from gate import AddGate, MultiplyGate
import numpy as np


mulGate = MultiplyGate()
addGate = AddGate()
activation = Tanh()

class RNNLayer:
    def forward(self, x, prev_s, U, W, V):
        self.mulu = mulGate.forward(U, x)
        self.mulw = mulGate.forward(W, prev_s)
        self.add = addGate.forward(self.mulw, self.mulu)
        self.s = activation.forward(self.add)
        self.mulv = mulGate.forward(V, self.s)

    def backward(self, x, prev_s, U, W, V, diff_s, dmulv, forward=True):
        if forward:
            self.forward(x, prev_s, U, W, V)
        dV, dsv = mulGate.backward(V, self.s, dmulv)
        ds = dsv + diff_s
        dadd = activation.backward(self.add, ds)
        dmulw, dmulu = addGate.backward(self.mulw, self.mulu, dadd)
        dW, dprev_s = mulGate.backward(W, prev_s, dmulw)
        dU, dx = mulGate.backward(U, x, dmulu)
        return (dprev_s, dU, dW, dV)
    
    def backward1(self, x, prev_s, U, W, V, delta1, dmulv, forward=True):
        if forward:
            self.forward(x, prev_s, U, W, V)