Exemplo n.º 1
0
 def forward(self, input_tensor, hidden_state):
     self.tanh = TanH()
     self.sigmoid = Sigmoid()
     # store variables which are needed in backward pass
     self.input_tensor = input_tensor
     self.input_h = hidden_state
     # forward propagation algorithm
     self.output_h = self.tanh.forward(
         np.dot(hidden_state, self.W_hh.T) +
         np.dot(input_tensor, self.W_xh.T) + self.B_h)
     output_tensor = self.sigmoid.forward(
         np.dot(self.output_h, self.W_hy.T) + self.B_y)
     return output_tensor, self.output_h
Exemplo n.º 2
0
 def __init__(self, input_size, hidden_size, output_size):
     super().__init__()
     self.input_size = input_size      # 13
     self.hidden_size = hidden_size    # 7
     self.output_size = output_size    # 5
     self.len_tbptt = 0    # batch size = 9
     self.FC_h = FullyConnected(hidden_size + input_size, hidden_size)
     self.FC_y = FullyConnected(hidden_size, output_size)
     self.tan_h = TanH()
     self.h_t = None
     self.memory = False
     self.last_iter_h_t = None
     self.optimizer = None
     
     self.batch_size = None
     self.hidden_FC_mem = []
Exemplo n.º 3
0
    def __init__(self, weights_xh, bias_xh, weights_y, bias_y):
        self.k, self.H = weights_y.shape
        self.j = weights_xh.shape[1] - self.H
        # initialize trainable parameters
        # Weights of hidden_layer are Wf, Wi, Wc, W0, shape (H x H), 4 as shape (4H x H)
        # Weights of input_tensor are W1, W2, W3, W4, shape (H x j), 4 as shape (4H x j)
        # Combine all these weights together as shape (4H x (j + H))
        # Combine input_tensor and hidden_state as shape (1 x (j + H))
        # Weights of output_tensor is Wy, shape (k x H)
        self.w_xh = weights_xh  # W_xh = [[W1, Wf], [W2, Wi], [W3, Wc], [W4, W0]] shape(4H x (j + H))
        self.b_xh = bias_xh  # (1 x 4H)
        self.w_y = weights_y  # (k x H)
        self.b_y = bias_y  # (1 x k)

        self.input_xh = None
        self.cell_state = None
        self.hidden_state = None
        self.tan = []
        self.sig = []
        # store parameter for backward
        self.f_t = None
        self.i_t = None
        self.c_hat_t = None
        self.o_t = None
        self.a_t = None
        self.cell_state = None
        self.con_tensor_xh = None
        self.out_hidden_state = None
        self.tan = [TanH() for _ in range(2)]
        self.sig = [Sigmoid() for _ in range(4)]
Exemplo n.º 4
0
class RNN_cell:
    def __init__(self, W_xh, W_hh, W_hy, B_h, B_y):
        # W_xh:(H, J)   W_hh: (H, H)    W_hy: (K, H)    B_h: (1, H)    B_y: (1, K)
        self.W_xh, self.W_hh, self.W_hy, self.B_h, self.B_y = W_xh, W_hh, W_hy, B_h, B_y

        # Variables which are stored in forward pass for backward pass
        self.sigmoid = None  # store tanh activation function
        self.tanh = None  # store tanh activation function
        self.input_tensor = None  # store input_tensor
        self.output_h = None  # hidden state of current cell
        self.input_h = None  # hidden state of last cell

    def forward(self, input_tensor, hidden_state):
        self.tanh = TanH()
        self.sigmoid = Sigmoid()
        # store variables which are needed in backward pass
        self.input_tensor = input_tensor
        self.input_h = hidden_state
        # forward propagation algorithm
        self.output_h = self.tanh.forward(
            np.dot(hidden_state, self.W_hh.T) +
            np.dot(input_tensor, self.W_xh.T) + self.B_h)
        output_tensor = self.sigmoid.forward(
            np.dot(self.output_h, self.W_hy.T) + self.B_y)
        return output_tensor, self.output_h

    def backward(self, error_tensor, hidden_error):
        error_tensor = self.sigmoid.backward(error_tensor)
        e_tmp = self.tanh.backward(
            np.dot(error_tensor, self.W_hy) +
            hidden_error)  # error transferred over tanh
        hidden_error = np.dot(e_tmp, self.W_hh)
        output_error = np.dot(e_tmp, self.W_xh)
        grad_W_hy = np.dot(error_tensor.reshape(-1, 1),
                           self.output_h.reshape(1, -1))  # grad_V (K, H)
        grad_B_y = error_tensor  # (1, K)
        grad_W_hh = np.dot(e_tmp.reshape(-1, 1),
                           self.input_h.reshape(1, -1))  # (H, H)
        grad_W_xh = np.dot(e_tmp.reshape(-1, 1),
                           self.input_tensor.reshape(1, -1))  # (H, J)
        grad_B_h = e_tmp
        return output_error, hidden_error, grad_W_hy, grad_B_y, grad_W_hh, grad_W_xh, grad_B_h
Exemplo n.º 5
0
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        # hidden layer & output layer
        self.fc_h = FullyConnected(self.input_size + self.hidden_size,
                                   self.hidden_size)
        self.fc_y = FullyConnected(self.hidden_size, self.output_size)
        self.sigmoid = Sigmoid()
        self.tanh = TanH()

        # output of hidden layer & output layer
        self.hidden_state = []
        self.hidden_state.append(np.zeros(hidden_size))  # a vector!
        self.output = []

        self._memorize = False
        self._gradient_weights = None
        self._optimizer = None
Exemplo n.º 6
0
    def forward(self, input_tensor, hidden_state, cell_state):
        """
        This function realize the forward propagation of one LSTM-cell
        Args:
            input_tensor: x_t, input tensor of current slot.
            hidden_state: h_t-1, hidden state of previous LSTM-cell (time slot)
            cell_state: c_t-1, cell state of previous LSTM-cell (time slot)
        Returns:
            ndarray, output_tensor: y_t, output tensor of current slot.
            ndarray, next_h: h_t, hidden state of previous LSTM-cell, which will be transferred to next cell
            ndarray, next_c: c_t, cell state of previous LSTM-cell, which will be transferred to next cell
        """
        # Data preparation.
        x = input_tensor.reshape(1, -1)  # shape: (1,J)
        prev_h = hidden_state  # shape: (1,H)
        prev_c = cell_state  # shape: (1,H)
        _, H = hidden_state.shape  # hidden size

        # initialize tanh and sigmoid functions
        self.sigmoid = [Sigmoid() for _ in range(4)]
        self.tanh = [TanH() for _ in range(2)]

        # forward propagation in LSTM-cell
        embedding = np.dot(x, self.W_xh.T) + np.dot(
            prev_h, self.W_hh.T) + self.B_h  # (1,4H)
        f = self.sigmoid[0].forward(embedding[:, :H])
        i = self.sigmoid[1].forward(embedding[:, H:2 * H])
        c_hat = self.tanh[0].forward(embedding[:, 2 * H:3 * H])
        o = self.sigmoid[2].forward(embedding[:, 3 * H:])
        # calculation of new cell_state
        next_c = prev_c * f + i * c_hat
        # calculation of new hidden_state
        tanh_output = self.tanh[1].forward(next_c)
        next_h = o * tanh_output
        # calculation of output
        output_tensor = self.sigmoid[3].forward(
            np.dot(next_h, self.W_hy.T) + self.B_y)

        # return the variables which are needed in backward propagation
        self.cache = [
            f, i, c_hat, o, x, prev_h, prev_c, tanh_output, next_h, next_c
        ]

        return output_tensor, next_h, next_c
Exemplo n.º 7
0
class RNN(Base.Base_Layer):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.input_size = input_size      # 13
        self.hidden_size = hidden_size    # 7
        self.output_size = output_size    # 5
        self.len_tbptt = 0    # batch size = 9
        self.FC_h = FullyConnected(hidden_size + input_size, hidden_size)
        self.FC_y = FullyConnected(hidden_size, output_size)
        self.tan_h = TanH()
        self.h_t = None
        self.memory = False
        self.last_iter_h_t = None
        self.optimizer = None
        
        self.batch_size = None
        self.hidden_FC_mem = []

    # whether the RNN regards subsequent sequences as a belonging to the same long sequence
    @property
    def memorize(self):
        return self.memory
    
    @memorize.setter
    def memorize(self, value):
        self.memory = value


    """
    Implement a method forward(input tensor) which returns the input tensor for the next layer.
    Consider the ”batch” dimension as the ”time” dimension of a sequence over
    which the recurrence is performed. The first hidden state for this iteration is all zero if
    the boolean member variable is False, otherwise restore the hidden state from the last
    iteration. You can choose to compose parts of the RNN from other layers you already
    implemented.
    """
    # input_tensor = (input_size, batch_size).T
    def forward(self, input_tensor):
        self.batch_size = input_tensor.shape[0]
        # prepare a matrix of the output so that no need of extra saving of vectors
        if self.memory:
            if self.h_t is None:
                self.h_t = np.zeros((self.batch_size + 1, self.hidden_size))    # (9+1, 7)
            else:
                print('********************')
                self.h_t[0] = self.last_iter_h_t
        else:   # take previous value
            self.h_t = np.zeros((self.batch_size + 1, self.hidden_size))

        y_t = np.zeros((self.batch_size, self.output_size))
        
        # concatenating x,ht-1 and 1 to do forwarding to obtain new hidden state ht
        # 1: for t from 1 to T do:
        # 2:    ut = W hh · h t − 1 + W xh · x t + b h --> h t = tanh (x̃ t · W h )
        # 3:    h t = tanh ( u t )
        # 4:    o t = W hy · h t + b y
        # 5:    ŷ t = σ( o t )
        #self.batch_size = 1
        for batch in range(self.batch_size):    # batch = time
            axis_h_t = self.h_t[batch][np.newaxis, :]   # add row
            axis_input_t = input_tensor[batch][np.newaxis, :]
            new_input = np.concatenate((axis_h_t, axis_input_t), axis=1)    # x̃_t
            #print(new_input.shape)
            
            self.hidden_FC_mem.append(new_input)
            # print(self.hidden_FC_mem)
            #if self.memory:
            #    wt = self.FC_h.forward(self.hidden_FC_mem[batch - 1])
            #else:
            wt = self.FC_h.forward(new_input)
            new_input = np.concatenate((np.expand_dims(self.h_t[batch], 0), np.expand_dims(input_tensor[batch], 0)), axis=1)
            self.h_t[batch+1] = self.tan_h.forward(wt)    # h t = tanh (x̃ t · W h )
            # o_t = W_hy · h_t + b_y ---> no need of sigmoid and bias added afterwards
            # ŷ_t = W_hy · h_t --> batch+1 = h_t and batch = h_t-1
            y_t[batch] = (self.FC_y.forward(self.h_t[batch + 1][np.newaxis, :]))

        self.last_iter_h_t = self.h_t[-1]
        print(self.h_t.shape)
        #print(self.last_iter_h_t)
        self.input_tensor = input_tensor
        return y_t

    # Remember that optimizers are decoupled from our layers.
    def backward(self, error_tensor):
        
        #print('error_tensor', error_tensor.shape)
        self.error_tensor_out = np.zeros((self.batch_size, self.input_size))
        hx_size = self.hidden_size + self.input_size    # (20,7)
        steps = 1
        self.gradient_weights_y = np.zeros((self.hidden_size+1, self.output_size))
        self.gradient_weights_hx = np.zeros((hx_size+1, self.hidden_size))
        #print(self.h_t.shape)
        gradient_tanh = 1 - self.h_t[1::] ** 2
        error_h = np.zeros((1, self.hidden_size))  # backward
        
        # 1: for t from 1 to T do:
        # 2:    Run RNN for one step, computing h_t and y_t
        # 3:    if t mod k_1 == 0:
        # 4:        Run BPTT from t down to t-k_2

        for batch in reversed(range(self.batch_size)):
            one_batch_error = error_tensor[batch]
            error_y_h = self.FC_y.backward(one_batch_error[np.newaxis, :])
            #print(error_y_h.shape)
            self.FC_y.input_tensor = np.hstack((self.h_t[batch+1], 1))[np.newaxis, :]

            gra_y_ht = error_h+error_y_h
            # print('ht,gradient_tanh', error_y_h.shape, error_h.shape, gra_y_ht.shape, gradient_tanh[batch].shape)
            gradient_hidden_t = gradient_tanh[batch]*gra_y_ht
            error_hx = self.FC_h.backward(gradient_hidden_t)
            error_h = error_hx[:, 0:self.hidden_size]   # hidden
            error_x = error_hx[:, self.hidden_size:hx_size + 1]
            self.error_tensor_out[batch] = error_x
            concat = np.hstack((self.h_t[batch], self.input_tensor[batch], 1))
            self.FC_h.input_tensor = concat[np.newaxis, :]

            print(steps, ' ', self.len_tbptt)
            #self.weights_y = self.FC_y.getter()  # get_weights()
            if steps <= self.len_tbptt:
                self.weights_y = self.FC_y.getter() #get_weights()
                self.weights_h = self.FC_h.getter()  #get_weights()
                self.gradient_weights()

            steps += 1

        if self.optimizer is not None:
            self.weights_y = self.optimizer.calculate_update(self.weights_y, self.gradient_weights_y)
            self.weights_h = self.optimizer.calculate_update(self.weights_h, self.gradient_weights_hx)
            self.FC_y.setter(self.weights_y)      # .set_weights(self.weights_y)
            self.FC_h.setter(self.weights_h)      # .set_weights(self.weights_h)

        return self.error_tensor_out
    
    """
    if the hidden state is computed with a single Fully Connected layer, which receives a
stack of the hidden state and the input tensor, the weights of this particular Fully
Connected Layer, are the weights considered to be weights for the whole class. In order
to provide access to the weights of the RNN layer, implement a getter and a setter with
a property for the weights member.
"""
    @property
    def gradient_weights(self):
        return self.gradient_weights_hx
        #self.gradient_weights_y += self.FC_y.gradient_weights()      # .get_gradient_weights()
        #self.gradient_weights_hx += self.FC_h.gradient_weights()     # .get_gradient_weights()
        #return self.gradient_weights_hx

    """@property
    def weights(self):
        weights = self.FC_h.getter()   # .get_weights()
        return weights

    @weights.setter
    def weights(self, weights):
        self.FC_h.setter(weights)"""

    def setter(self, optimizer):
        self._optimizer = copy.deepcopy(optimizer)

    def getter(self):
        return self._optimizer

    optimizer = property(getter, setter)

    def initialize(self, weights_initializer, bias_initializer):
        self.FC_y.initialize(weights_initializer, bias_initializer)
        self.FC_h.initialize(weights_initializer, bias_initializer)
Exemplo n.º 8
0
class RNN(BaseLayer):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        # hidden layer & output layer
        self.fc_h = FullyConnected(self.input_size + self.hidden_size,
                                   self.hidden_size)
        self.fc_y = FullyConnected(self.hidden_size, self.output_size)
        self.sigmoid = Sigmoid()
        self.tanh = TanH()

        # output of hidden layer & output layer
        self.hidden_state = []
        self.hidden_state.append(np.zeros(hidden_size))  # a vector!
        self.output = []

        self._memorize = False
        self._gradient_weights = None
        self._optimizer = None

    @property
    def memorize(self):
        return self._memorize

    @memorize.setter
    def memorize(self, new_memorize):
        self._memorize = new_memorize

    @property
    def weights(self):
        return self.fc_h.weights

    @weights.setter
    def weights(self, new_weights):
        self.fc_h.weights = new_weights

    @property
    def gradient_weights(self):
        return self._gradient_weights

    @gradient_weights.setter
    def gradient_weights(self, new_grad_w):
        self._gradient_weights = new_grad_w

    @property
    def optimizer(self):
        return self._optimizer

    @optimizer.setter
    def optimizer(self, new_optimizer):
        self._optimizer = new_optimizer

    def forward(self, input_tensor):
        self.t = input_tensor.shape[0]

        for t in range(input_tensor.shape[0]):
            if not self.memorize and t > 0:
                self.hidden_state[t - 1] = np.zeros(self.hidden_size)
            # if memorize, last hidden state for t=0 of the second batch = last hidden state of the first batch
            if self.memorize and t == 0:
                self.hidden_state[0] = self.hidden_state[self.t - 1]
            print(t)
            # ht
            x_composed = np.hstack(
                (input_tensor[t], self.hidden_state[t - 1 if t > 0 else 0]))
            x_composed = np.atleast_2d(x_composed)  # 2D
            print(x_composed)
            if t == 0:
                self.hidden_state[t] = self.tanh.forward(
                    self.fc_h.forward(x_composed))  # 2D
            else:
                self.hidden_state.append(
                    self.tanh.forward(self.fc_h.forward(x_composed)))  # 2D
            self.hidden_state[t] = self.hidden_state[t].reshape(
                self.hidden_state[t].shape[1])  # 1D
            print(self.hidden_state[t])
            # yt
            self.output.append(
                self.fc_y.forward(np.atleast_2d(self.hidden_state[t])))  # 2D
            self.output[t] = self.output[t].reshape(
                self.output[t].shape[1])  # 1D
            self.output[t] = self.sigmoid.forward(self.output[t])

        result = np.array(self.output)
        # store activations for sigmoid & tanh layer
        self.sigmoid_activations = result
        self.tanh_activations = np.array(self.hidden_state)

        return result

    def backward(self, error_tensor):
        gradient_wy = None
        gradient_wh = None
        gradient_ht = np.zeros((self.t, self.hidden_size))
        for t in range(error_tensor.shape[0] - 1, -1, -1):
            # gradient w.r.t. W_y
            # set activations for sigmoid layer
            self.sigmoid.activations = self.sigmoid_activations[t]
            gradient_ot = self.sigmoid.backward(error_tensor[t])

            # gradient w.r.t. ht
            # t = 0 / T: only one part of sum
            if t == error_tensor.shape[0] - 1:
                gradient_ht[t] = self.fc_y.backward(gradient_ot)

            else:
                # set activations for tanh layer
                self.tanh.activations = self.tanh_activations[t + 1]
                gradient_ut = self.tanh.backward(gradient_ht[t + 1])
                # decompose Wh: W_xh, W_hh
                wh = self.fc_h.backward(gradient_ut)
                if t == 0:
                    gradient_ht[t] = wh[:, self.input_size:self.input_size +
                                        self.hidden_size]
                else:
                    gradient_ht[t] = wh[:, self.input_size:self.input_size +
                                        self.hidden_size] + self.fc_y.backward(
                                            gradient_ot)

            # gradient w.r.t. W_y
            gradient_wy += self.fc_y.gradient_weights

            # gradient w.r.t W_h
            self.tanh.activations = self.tanh_activations[t]
            error = self.fc_h.backward(self.tanh.backward(gradient_ht[t]))
            gradient_wh += self.fc_h.gradient_weights

            # decompose Wh: W_xh, W_hh
            # gradient_whh = gradient_wh[self.input_size:self.input_size+self.hidden_size, :]
            # gradient_wxh = gradient_wh[0:self.input_size, :]

        return error

    def initialize(self, weights_initializer, bias_initializer):
        self.weights = weights_initializer.initialize(
            (self.input_size + self.hidden_size, self.output_size),
            self.input_size + self.hidden_size, self.output_size)
        self.bias = bias_initializer.initialize(
            self.output_size, self.input_size + self.hidden_size,
            self.output_size)

        self.weights = np.vstack((self.weights, self.bias))