Пример #1
0
    def pass_forward(self, inputs, train_mode=True):
        self.inputs = inputs
        batch_size, time_steps, input_dim = inputs.shape

        self.update = np.zeros((batch_size, time_steps, self.h_units))
        self.reset = np.zeros((batch_size, time_steps, self.h_units))
        self.cell = np.zeros((batch_size, time_steps, self.h_units))
        self.states = np.zeros((batch_size, time_steps, self.h_units))
        self.final = np.zeros((batch_size, time_steps, input_dim))

        self.z = np.concatenate((self.inputs, self.states), axis=2)
        self.z_tilde = np.zeros_like(self.z)

        for t in range(time_steps):
            self.update[:, t] = activate(self.gate_activation).forward(
                np.dot(self.z[:, t], self.W_update) + self.b_update)
            self.reset[:, t] = activate(self.gate_activation).forward(
                np.dot(self.z[:, t], self.W_reset) + self.b_reset)
            self.z_tilde[:, t] = np.concatenate(
                (self.reset[:, t] * self.states[:, t - 1], self.inputs[:, t]),
                axis=1)
            self.cell[:, t] = activate(self.activation).forward(
                np.dot(self.z_tilde[:, t - 1], self.W_cell) + self.b_cell)
            self.states[:, t] = (
                1. - self.update[:, t]
            ) * self.states[:, t - 1] + self.update[:, t] * self.cell[:, t]

            self.final[:, t] = np.dot(self.states[:, t],
                                      self.W_final) + self.b_final  # logits

        if not train_mode:
            return activate('softmax').forward(
                self.final)  # if mode is not training

        return self.final
Пример #2
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_input = np.zeros_like(self.W_input)
            dW_recur = np.zeros_like(self.W_recur)
            dW_output = np.zeros_like(self.W_output)

            db_input = np.zeros_like(self.b_input)
            db_output = np.zeros_like(self.b_output)

            for t in np.arange(time_steps)[::-1]:  # reversed
                dW_output += np.dot(grad[:, t].T, self.states[:, t])
                db_output += np.sum(grad[:, t], axis=0)
                dstate = np.dot(grad[:, t], self.W_output) * activate(
                    self.activation).backward(self.state_inputs[:, t])
                next_grad[:, t] = np.dot(dstate, self.W_input)

                for tt in np.arange(max(0, t - self.bptt_truncate),
                                    t + 1)[::-1]:  # reversed
                    dW_input += np.dot(dstate.T, self.inputs[:, tt])
                    dW_recur += np.dot(dstate.T, self.states[:, tt - 1])
                    db_input += np.sum(dstate, axis=0)
                    dstate = np.dot(dstate, self.W_recur) * activate(
                        self.activation).backward(self.state_inputs[:, tt - 1])

            # optimize weights and bias
            self.W_input = optimizer(self.optimizer_kwargs).update(
                self.W_input, cg(dW_input))
            self.W_output = optimizer(self.optimizer_kwargs).update(
                self.W_output, cg(dW_output))
            self.W_recur = optimizer(self.optimizer_kwargs).update(
                self.W_recur, cg(dW_recur))

            self.b_input = optimizer(self.optimizer_kwargs).update(
                self.b_input, cg(db_input))
            self.b_output = optimizer(self.optimizer_kwargs).update(
                self.b_output, cg(db_output))

        # endif self.is_trainable

        return next_grad
Пример #3
0
    def pass_forward(self, inputs, train_mode = True):
        self.inputs = inputs
        batch_size, time_steps, input_dim = inputs.shape

        self.forget     = np.zeros((batch_size, time_steps, self.h_units))
        self.input      = np.zeros((batch_size, time_steps, self.h_units))
        self.output     = np.zeros((batch_size, time_steps, self.h_units))
        self.states     = np.zeros((batch_size, time_steps, self.h_units))
        self.cell_tilde = np.zeros((batch_size, time_steps, self.h_units))
        self.cell       = np.zeros((batch_size, time_steps, self.h_units))
        self.final      = np.zeros((batch_size, time_steps, input_dim))

        self.z = np.concatenate((self.inputs, self.states), axis = 2)

        for t in range(time_steps):
            self.forget[:, t]     = activate(self.gate_activation).forward(np.dot(self.z[:, t], self.W_forget) + self.b_forget)
            self.input[:, t]      = activate(self.gate_activation).forward(np.dot(self.z[:, t], self.W_input) + self.b_input)
            self.cell_tilde[:, t] = activate(self.activation).forward(np.dot(self.z[:, t], self.W_cell) + self.b_cell)
            self.cell[:, t]       = self.forget[:, t] * self.cell[:, t - 1] + self.input[:, t] * self.cell_tilde[:, t]
            self.output[:, t]     = activate(self.gate_activation).forward(np.dot(self.z[:, t], self.W_output) + self.b_output)
            self.states[:, t]     = self.output[:, t] * activate(self.activation).forward(self.cell[:, t])

            # logits
            self.final[:, t] = np.dot(self.states[:, t], self.W_final) + self.b_final

        if not train_mode:
            return activate('softmax').forward(self.final) # if mode is not training

        return self.final
Пример #4
0
    def pass_forward(self, inputs, train_mode=True):
        self.inputs = inputs
        batch_size, time_steps, input_dim = inputs.shape

        self.state_inputs = np.zeros((batch_size, time_steps, self.h_units))
        self.states = np.zeros(
            (batch_size, time_steps + 1, self.h_units)
        )  # additional(+1) last column containing the final state also set to zero
        self.state_outputs = np.zeros((batch_size, time_steps, input_dim))

        for t in range(time_steps):
            self.state_inputs[:, t] = (
                np.dot(inputs[:, t], self.W_input.T) +
                np.dot(self.states[:, t - 1], self.W_recur.T)) + self.b_input
            self.states[:, t] = activate(self.activation).forward(
                self.state_inputs[:, t])
            self.state_outputs[:, t] = np.dot(self.states[:, t],
                                              self.W_output.T) + self.b_output

        if not train_mode:
            return activate('softmax').forward(
                self.state_outputs)  # if mode is not training

        return self.state_outputs
Пример #5
0
    def __init__(self,
                       epochs,
                       activation     = 'sigmoid',
                       loss           = 'categorical_crossentropy',
                       init_method    = 'he_normal',
                       optimizer      = {},
                       penalty        = 'lasso',
                       penalty_weight = 0,
                       l1_ratio       = 0.5):

        self.epochs         = epochs
        self.activate       = activate(activation)
        self.loss           = objective(loss)
        self.init_method    = init(init_method)
        self.optimizer      = optimizer
        self.regularization = regularize(penalty, penalty_weight, l1_ratio = l1_ratio)
Пример #6
0
    def pass_backward(self, grad, epoch_num, batch_num, batch_size):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_update = np.zeros_like(self.W_update)
            dW_reset = np.zeros_like(self.W_reset)
            dW_cell = np.zeros_like(self.W_cell)
            dW_final = np.zeros_like(self.W_final)

            db_update = np.zeros_like(self.b_update)
            db_reset = np.zeros_like(self.b_reset)
            db_cell = np.zeros_like(self.b_cell)
            db_final = np.zeros_like(self.b_final)

            dstates = np.zeros_like(self.states)
            dstate_a = np.zeros_like(self.states)
            dstate_b = np.zeros_like(self.states)
            dstate_c = np.zeros_like(self.states)
            dstates_next = np.zeros_like(self.states)
            dstates_prime = np.zeros_like(self.states)

            dz_cell = np.zeros_like(self.cell)
            dcell = np.zeros_like(self.cell)

            dz_reset = np.zeros_like(self.reset)
            dreset = np.zeros_like(self.reset)

            dz_update = np.zeros_like(self.update)
            dupdate = np.zeros_like(self.update)

            for t in np.arange(time_steps)[::-1]:  # reversed

                dW_final += np.dot(self.states[:, t].T, grad[:, t])
                db_final += np.sum(grad[:, t], axis=0)

                dstates[:, t] = np.dot(grad[:, t], self.W_final.T)
                dstates[:, t] += dstates_next[:, t]
                next_grad = np.dot(dstates, self.W_final)

                dcell[:, t] = self.update[:, t] * dstates[:, t]
                dstate_a[:, t] = (1. - self.update[:, t]) * dstates[:, t]
                dupdate[:,
                        t] = self.cell[:,
                                       t] * dstates[:,
                                                    t] - self.states[:, t -
                                                                     1] * dstates[:,
                                                                                  t]

                dcell[:, t] = activate(self.activation).backward(
                    self.cell[:, t]) * dcell[:, t]
                dW_cell += np.dot(self.z_tilde[:, t - 1].T, dcell[:, t])
                db_cell += np.sum(dcell[:, t], axis=0)
                dz_cell = np.dot(dcell[:, t], self.W_cell.T)

                dstates_prime[:, t] = dz_cell[:, :self.h_units]
                dstate_b[:, t] = self.reset[:, t] * dstates_prime[:, t]

                dreset[:, t] = self.states[:, t - 1] * dstates_prime[:, t]
                dreset[:, t] = activate(self.gate_activation).backward(
                    self.reset[:, t]) * dreset[:, t]
                dW_reset += np.dot(self.z[:, t].T, dreset[:, t])
                db_reset += np.sum(dreset[:, t], axis=0)
                dz_reset = np.dot(dreset[:, t], self.W_reset.T)

                dupdate[:, t] = activate(self.gate_activation).backward(
                    self.update[:, t]) * dupdate[:, t]
                dW_update += np.dot(self.z[:, t].T, dupdate[:, t])
                db_update += np.sum(dupdate[:, t], axis=0)
                dz_update = np.dot(dupdate[:, t], self.W_update.T)

                dz = dz_reset + dz_update
                dstate_c[:, t] = dz[:, :self.h_units]

                dstates_next = dstate_a + dstate_b + dstate_c

            # optimize weights and bias
            self.W_final = optimizer(self.optimizer_kwargs).update(
                self.W_final, cg(dW_final), epoch_num, batch_num, batch_size)
            self.b_final = optimizer(self.optimizer_kwargs).update(
                self.b_final, cg(db_final), epoch_num, batch_num, batch_size)

            self.W_cell = optimizer(self.optimizer_kwargs).update(
                self.W_cell, cg(dW_cell), epoch_num, batch_num, batch_size)
            self.b_cell = optimizer(self.optimizer_kwargs).update(
                self.b_cell, cg(db_cell), epoch_num, batch_num, batch_size)

            self.W_reset = optimizer(self.optimizer_kwargs).update(
                self.W_reset, cg(dW_reset), epoch_num, batch_num, batch_size)
            self.b_reset = optimizer(self.optimizer_kwargs).update(
                self.b_reset, cg(db_reset), epoch_num, batch_num, batch_size)

            self.W_update = optimizer(self.optimizer_kwargs).update(
                self.W_update, cg(dW_update), epoch_num, batch_num, batch_size)
            self.b_update = optimizer(self.optimizer_kwargs).update(
                self.b_update, cg(db_update), epoch_num, batch_num, batch_size)

        # endif self.is_trainable

        return next_grad
Пример #7
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_forget = np.zeros_like(self.W_forget)
            dW_input = np.zeros_like(self.W_input)
            dW_output = np.zeros_like(self.W_output)
            dW_cell = np.zeros_like(self.W_cell)
            dW_final = np.zeros_like(self.W_final)

            db_forget = np.zeros_like(self.b_forget)
            db_input = np.zeros_like(self.b_input)
            db_output = np.zeros_like(self.b_output)
            db_cell = np.zeros_like(self.b_cell)
            db_final = np.zeros_like(self.b_final)

            dstates = np.zeros_like(self.states)
            dcell = np.zeros_like(self.cell)
            dcell_tilde = np.zeros_like(self.cell_tilde)
            dforget = np.zeros_like(self.forget)
            dinput = np.zeros_like(self.input)
            doutput = np.zeros_like(self.output)

            dcell_next = np.zeros_like(self.cell)
            dstates_next = np.zeros_like(self.states)

            for t in np.arange(time_steps)[::-1]:  # reversed

                dW_final += np.dot(self.states[:, t].T, grad[:, t])
                db_final += np.sum(grad[:, t], axis=0)

                dstates[:, t] = np.dot(grad[:, t], self.W_final.T)
                dstates[:, t] += dstates_next[:, t]
                next_grad = np.dot(dstates, self.W_final)

                doutput[:, t] = activate(self.activation).forward(
                    self.cell[:, t]) * dstates[:, t]
                doutput[:, t] = activate(self.gate_activation).backward(
                    self.output[:, t]) * doutput[:, t]
                dW_output += np.dot(self.z[:, t].T, doutput[:, t])
                db_output += np.sum(doutput[:, t], axis=0)

                dcell[:, t] += self.output[:, t] * dstates[:, t] * activate(
                    self.activation).backward(self.cell[:, t])
                dcell[:, t] += dcell_next[:, t]
                dcell_tilde[:, t] = dcell[:, t] * self.input[:, t]
                dcell_tilde[:, t] = dcell_tilde[:, t] * activate(
                    self.activation).backward(dcell_tilde[:, t])
                dW_cell += np.dot(self.z[:, t].T, dcell[:, t])
                db_cell += np.sum(dcell[:, t], axis=0)

                dinput[:, t] = self.cell_tilde[:, t] * dcell[:, t]
                dinput[:, t] = activate(self.gate_activation).backward(
                    self.input[:, t]) * dinput[:, t]
                dW_input += np.dot(self.z[:, t].T, dinput[:, t])
                db_input += np.sum(dinput[:, t], axis=0)

                dforget[:, t] = self.cell[:, t - 1] * dcell[:, t]
                dforget[:, t] = activate(self.gate_activation).backward(
                    self.forget[:, t]) * dforget[:, t]
                dW_forget += np.dot(self.z[:, t].T, dforget[:, t])
                db_forget += np.sum(dforget[:, t], axis=0)

                dz_forget = np.dot(dforget[:, t], self.W_forget.T)
                dz_input = np.dot(dinput[:, t], self.W_input.T)
                dz_output = np.dot(doutput[:, t], self.W_output.T)
                dz_cell = np.dot(dcell[:, t], self.W_cell.T)

                dz = dz_forget + dz_input + dz_output + dz_cell
                dstates_next[:, t] = dz[:, :self.h_units]
                dcell_next = self.forget * dcell

            # optimize weights and bias
            self.W_final = optimizer(self.optimizer_kwargs).update(
                self.W_final, cg(dW_final))
            self.b_final = optimizer(self.optimizer_kwargs).update(
                self.b_final, cg(db_final))

            self.W_forget = optimizer(self.optimizer_kwargs).update(
                self.W_forget, cg(dW_forget))
            self.b_forget = optimizer(self.optimizer_kwargs).update(
                self.b_forget, cg(db_forget))

            self.W_input = optimizer(self.optimizer_kwargs).update(
                self.W_input, cg(dW_input))
            self.b_input = optimizer(self.optimizer_kwargs).update(
                self.b_input, cg(db_input))

            self.W_output = optimizer(self.optimizer_kwargs).update(
                self.W_output, cg(dW_output))
            self.b_output = optimizer(self.optimizer_kwargs).update(
                self.b_output, cg(db_output))

            self.W_cell = optimizer(self.optimizer_kwargs).update(
                self.W_cell, cg(dW_cell))
            self.b_cell = optimizer(self.optimizer_kwargs).update(
                self.b_cell, cg(db_cell))

        # endif self.is_trainable

        return next_grad