示例#1
0
    def pass_backward(self, grad):
        prev_weights = self.weights

        if self.is_trainable:

            dweights = self.inputs.T @ grad
            dbias = np.sum(grad, axis=0, keepdims=True)

            self.weights = optimizer(self.weight_optimizer).update(
                self.weights, dweights)
            self.bias = optimizer(self.weight_optimizer).update(
                self.bias, dbias)

        # endif self.is_trainable

        return grad @ prev_weights.T
示例#2
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_input = np.zeros_like(self.W_input)
            dW_recur = np.zeros_like(self.W_recur)
            dW_output = np.zeros_like(self.W_output)

            db_input = np.zeros_like(self.b_input)
            db_output = np.zeros_like(self.b_output)

            for t in np.arange(time_steps)[::-1]:  # reversed
                dW_output += np.dot(grad[:, t].T, self.states[:, t])
                db_output += np.sum(grad[:, t], axis=0)
                dstate = np.dot(grad[:, t], self.W_output) * activate(
                    self.activation).backward(self.state_inputs[:, t])
                next_grad[:, t] = np.dot(dstate, self.W_input)

                for tt in np.arange(max(0, t - self.bptt_truncate),
                                    t + 1)[::-1]:  # reversed
                    dW_input += np.dot(dstate.T, self.inputs[:, tt])
                    dW_recur += np.dot(dstate.T, self.states[:, tt - 1])
                    db_input += np.sum(dstate, axis=0)
                    dstate = np.dot(dstate, self.W_recur) * activate(
                        self.activation).backward(self.state_inputs[:, tt - 1])

            # optimize weights and bias
            self.W_input = optimizer(self.optimizer_kwargs).update(
                self.W_input, cg(dW_input))
            self.W_output = optimizer(self.optimizer_kwargs).update(
                self.W_output, cg(dW_output))
            self.W_recur = optimizer(self.optimizer_kwargs).update(
                self.W_recur, cg(dW_recur))

            self.b_input = optimizer(self.optimizer_kwargs).update(
                self.b_input, cg(db_input))
            self.b_output = optimizer(self.optimizer_kwargs).update(
                self.b_output, cg(db_output))

        # endif self.is_trainable

        return next_grad
示例#3
0
    def pass_backward(self, grad):
        dinput_norm = grad * self.gamma

        if self.is_trainable:

            dbeta = np.sum(grad, axis=0)
            dgamma = np.sum(grad * self.input_norm, axis=0)

            self.gamma = optimizer(self.weight_optimizer).update(
                self.gamma, dgamma)
            self.beta = optimizer(self.weight_optimizer).update(
                self.beta, dbeta)

        # endif self.is_trainable

        dinput = np.divide(1., grad.shape[0]) * self.inv_stddev * (
            grad.shape[0] * dinput_norm - np.sum(dinput_norm, axis=0) -
            self.input_norm * np.sum(dinput_norm * self.input_norm, axis=0))

        return dinput
示例#4
0
    def pass_backward(self, grad, epoch_num, batch_num, batch_size):
        prev_weights = self.weights

        if self.is_trainable:

            dweights     = np.sum(grad @ self.weights.T, axis = 1)
            self.weights = optimizer(self.weight_optimizer).update(self.weights, dweights.T, epoch_num, batch_num, batch_size)

        # endif self.is_trainable

        return grad @ prev_weights.T
示例#5
0
    def pass_backward(self, grad):
        input_num, input_depth, input_height, input_width = self.input_shape
        doutput_reshaped = grad.transpose(1, 2, 3,
                                          0).reshape(self.filter_num, -1)

        if self.is_trainable:

            dbias = np.sum(grad, axis=(0, 2, 3))
            dbias = dbias.reshape(self.filter_num, -1)

            dweights = doutput_reshaped @ self.input_col.T
            dweights = dweights.reshape(self.weights.shape)

            # optimize the weights and bias
            self.weights = optimizer(self.weight_optimizer).update(
                self.weights, dweights)
            self.bias = optimizer(self.weight_optimizer).update(
                self.bias, dbias)

        # endif self.is_trainable

        weight_reshape = self.weights.reshape(self.filter_num, -1)
        dinput_col = weight_reshape.T @ doutput_reshaped

        pad_height, pad_width = get_pad(self.padding, input_height,
                                        input_width, self.strides[0],
                                        self.strides[1], self.kernel_size[0],
                                        self.kernel_size[1])

        dinputs = col2im_indices(dinput_col,
                                 self.input_shape,
                                 self.kernel_size[0],
                                 self.kernel_size[1],
                                 padding=(pad_height, pad_width),
                                 stride=self.strides[0])

        return dinputs
示例#6
0
文件: gru.py 项目: jefkine/zeta-learn
    def pass_backward(self, grad, epoch_num, batch_num, batch_size):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_update = np.zeros_like(self.W_update)
            dW_reset = np.zeros_like(self.W_reset)
            dW_cell = np.zeros_like(self.W_cell)
            dW_final = np.zeros_like(self.W_final)

            db_update = np.zeros_like(self.b_update)
            db_reset = np.zeros_like(self.b_reset)
            db_cell = np.zeros_like(self.b_cell)
            db_final = np.zeros_like(self.b_final)

            dstates = np.zeros_like(self.states)
            dstate_a = np.zeros_like(self.states)
            dstate_b = np.zeros_like(self.states)
            dstate_c = np.zeros_like(self.states)
            dstates_next = np.zeros_like(self.states)
            dstates_prime = np.zeros_like(self.states)

            dz_cell = np.zeros_like(self.cell)
            dcell = np.zeros_like(self.cell)

            dz_reset = np.zeros_like(self.reset)
            dreset = np.zeros_like(self.reset)

            dz_update = np.zeros_like(self.update)
            dupdate = np.zeros_like(self.update)

            for t in np.arange(time_steps)[::-1]:  # reversed

                dW_final += np.dot(self.states[:, t].T, grad[:, t])
                db_final += np.sum(grad[:, t], axis=0)

                dstates[:, t] = np.dot(grad[:, t], self.W_final.T)
                dstates[:, t] += dstates_next[:, t]
                next_grad = np.dot(dstates, self.W_final)

                dcell[:, t] = self.update[:, t] * dstates[:, t]
                dstate_a[:, t] = (1. - self.update[:, t]) * dstates[:, t]
                dupdate[:,
                        t] = self.cell[:,
                                       t] * dstates[:,
                                                    t] - self.states[:, t -
                                                                     1] * dstates[:,
                                                                                  t]

                dcell[:, t] = activate(self.activation).backward(
                    self.cell[:, t]) * dcell[:, t]
                dW_cell += np.dot(self.z_tilde[:, t - 1].T, dcell[:, t])
                db_cell += np.sum(dcell[:, t], axis=0)
                dz_cell = np.dot(dcell[:, t], self.W_cell.T)

                dstates_prime[:, t] = dz_cell[:, :self.h_units]
                dstate_b[:, t] = self.reset[:, t] * dstates_prime[:, t]

                dreset[:, t] = self.states[:, t - 1] * dstates_prime[:, t]
                dreset[:, t] = activate(self.gate_activation).backward(
                    self.reset[:, t]) * dreset[:, t]
                dW_reset += np.dot(self.z[:, t].T, dreset[:, t])
                db_reset += np.sum(dreset[:, t], axis=0)
                dz_reset = np.dot(dreset[:, t], self.W_reset.T)

                dupdate[:, t] = activate(self.gate_activation).backward(
                    self.update[:, t]) * dupdate[:, t]
                dW_update += np.dot(self.z[:, t].T, dupdate[:, t])
                db_update += np.sum(dupdate[:, t], axis=0)
                dz_update = np.dot(dupdate[:, t], self.W_update.T)

                dz = dz_reset + dz_update
                dstate_c[:, t] = dz[:, :self.h_units]

                dstates_next = dstate_a + dstate_b + dstate_c

            # optimize weights and bias
            self.W_final = optimizer(self.optimizer_kwargs).update(
                self.W_final, cg(dW_final), epoch_num, batch_num, batch_size)
            self.b_final = optimizer(self.optimizer_kwargs).update(
                self.b_final, cg(db_final), epoch_num, batch_num, batch_size)

            self.W_cell = optimizer(self.optimizer_kwargs).update(
                self.W_cell, cg(dW_cell), epoch_num, batch_num, batch_size)
            self.b_cell = optimizer(self.optimizer_kwargs).update(
                self.b_cell, cg(db_cell), epoch_num, batch_num, batch_size)

            self.W_reset = optimizer(self.optimizer_kwargs).update(
                self.W_reset, cg(dW_reset), epoch_num, batch_num, batch_size)
            self.b_reset = optimizer(self.optimizer_kwargs).update(
                self.b_reset, cg(db_reset), epoch_num, batch_num, batch_size)

            self.W_update = optimizer(self.optimizer_kwargs).update(
                self.W_update, cg(dW_update), epoch_num, batch_num, batch_size)
            self.b_update = optimizer(self.optimizer_kwargs).update(
                self.b_update, cg(db_update), epoch_num, batch_num, batch_size)

        # endif self.is_trainable

        return next_grad
示例#7
0
    def pass_backward(self, grad):
        _, time_steps, _ = grad.shape
        next_grad = np.zeros_like(grad)

        if self.is_trainable:

            dW_forget = np.zeros_like(self.W_forget)
            dW_input = np.zeros_like(self.W_input)
            dW_output = np.zeros_like(self.W_output)
            dW_cell = np.zeros_like(self.W_cell)
            dW_final = np.zeros_like(self.W_final)

            db_forget = np.zeros_like(self.b_forget)
            db_input = np.zeros_like(self.b_input)
            db_output = np.zeros_like(self.b_output)
            db_cell = np.zeros_like(self.b_cell)
            db_final = np.zeros_like(self.b_final)

            dstates = np.zeros_like(self.states)
            dcell = np.zeros_like(self.cell)
            dcell_tilde = np.zeros_like(self.cell_tilde)
            dforget = np.zeros_like(self.forget)
            dinput = np.zeros_like(self.input)
            doutput = np.zeros_like(self.output)

            dcell_next = np.zeros_like(self.cell)
            dstates_next = np.zeros_like(self.states)

            for t in np.arange(time_steps)[::-1]:  # reversed

                dW_final += np.dot(self.states[:, t].T, grad[:, t])
                db_final += np.sum(grad[:, t], axis=0)

                dstates[:, t] = np.dot(grad[:, t], self.W_final.T)
                dstates[:, t] += dstates_next[:, t]
                next_grad = np.dot(dstates, self.W_final)

                doutput[:, t] = activate(self.activation).forward(
                    self.cell[:, t]) * dstates[:, t]
                doutput[:, t] = activate(self.gate_activation).backward(
                    self.output[:, t]) * doutput[:, t]
                dW_output += np.dot(self.z[:, t].T, doutput[:, t])
                db_output += np.sum(doutput[:, t], axis=0)

                dcell[:, t] += self.output[:, t] * dstates[:, t] * activate(
                    self.activation).backward(self.cell[:, t])
                dcell[:, t] += dcell_next[:, t]
                dcell_tilde[:, t] = dcell[:, t] * self.input[:, t]
                dcell_tilde[:, t] = dcell_tilde[:, t] * activate(
                    self.activation).backward(dcell_tilde[:, t])
                dW_cell += np.dot(self.z[:, t].T, dcell[:, t])
                db_cell += np.sum(dcell[:, t], axis=0)

                dinput[:, t] = self.cell_tilde[:, t] * dcell[:, t]
                dinput[:, t] = activate(self.gate_activation).backward(
                    self.input[:, t]) * dinput[:, t]
                dW_input += np.dot(self.z[:, t].T, dinput[:, t])
                db_input += np.sum(dinput[:, t], axis=0)

                dforget[:, t] = self.cell[:, t - 1] * dcell[:, t]
                dforget[:, t] = activate(self.gate_activation).backward(
                    self.forget[:, t]) * dforget[:, t]
                dW_forget += np.dot(self.z[:, t].T, dforget[:, t])
                db_forget += np.sum(dforget[:, t], axis=0)

                dz_forget = np.dot(dforget[:, t], self.W_forget.T)
                dz_input = np.dot(dinput[:, t], self.W_input.T)
                dz_output = np.dot(doutput[:, t], self.W_output.T)
                dz_cell = np.dot(dcell[:, t], self.W_cell.T)

                dz = dz_forget + dz_input + dz_output + dz_cell
                dstates_next[:, t] = dz[:, :self.h_units]
                dcell_next = self.forget * dcell

            # optimize weights and bias
            self.W_final = optimizer(self.optimizer_kwargs).update(
                self.W_final, cg(dW_final))
            self.b_final = optimizer(self.optimizer_kwargs).update(
                self.b_final, cg(db_final))

            self.W_forget = optimizer(self.optimizer_kwargs).update(
                self.W_forget, cg(dW_forget))
            self.b_forget = optimizer(self.optimizer_kwargs).update(
                self.b_forget, cg(db_forget))

            self.W_input = optimizer(self.optimizer_kwargs).update(
                self.W_input, cg(dW_input))
            self.b_input = optimizer(self.optimizer_kwargs).update(
                self.b_input, cg(db_input))

            self.W_output = optimizer(self.optimizer_kwargs).update(
                self.W_output, cg(dW_output))
            self.b_output = optimizer(self.optimizer_kwargs).update(
                self.b_output, cg(db_output))

            self.W_cell = optimizer(self.optimizer_kwargs).update(
                self.W_cell, cg(dW_cell))
            self.b_cell = optimizer(self.optimizer_kwargs).update(
                self.b_cell, cg(db_cell))

        # endif self.is_trainable

        return next_grad
示例#8
0
    def pass_backward(self, grad):
        input_num, input_depth, input_height, input_width = self.inputs.shape

        # initialize the gradient(s)
        dinputs = np.zeros(self.inputs.shape)

        if self.is_trainable:

            # initialize the gradient(s)
            dweights = np.zeros(self.weights.shape)
            dbias = np.zeros(self.bias.shape)

            pad_height, pad_width = get_pad(self.padding, input_height,
                                            input_width, self.strides[0],
                                            self.strides[1],
                                            self.kernel_size[0],
                                            self.kernel_size[1])

            pad_size = (np.sum(pad_height) / 2).astype(int)
            if pad_size != 0:
                grad = grad[:, :, pad_size:-pad_size, pad_size:-pad_size]

            # dweights
            for f in np.arange(self.filter_num):  # filter number
                for c in np.arange(input_depth):  # input depth (channels)
                    for h in np.arange(self.kernel_size[0]):  # kernel height
                        for w in np.arange(
                                self.kernel_size[1]):  # kernel width
                            input_patch = self.inputs[:, c, h:input_height -
                                                      self.kernel_size[0] + h +
                                                      1:self.strides[0],
                                                      w:input_width -
                                                      self.kernel_size[1] + w +
                                                      1:self.strides[1]]

                            grad_patch = grad[:, f]
                            dweights[f, c, h, w] = np.sum(
                                input_patch * grad_patch) / input_num

            # dbias
            for f in np.arange(self.filter_num):  # filter number
                dbias[f] = np.sum(grad[:, f]) / input_num

            # optimize the weights and bias
            self.weights = optimizer(self.weight_optimizer).update(
                self.weights, dweights)
            self.bias = optimizer(self.weight_optimizer).update(
                self.bias, dbias)

        # endif self.is_trainable

        # dinputs
        for b in np.arange(input_num):  # batch number
            for f in np.arange(self.filter_num):  # filter number
                for c in np.arange(input_depth):  # input depth (channels)
                    for h in np.arange(self.kernel_size[0]):  # kernel height
                        for w in np.arange(
                                self.kernel_size[1]):  # kernel width
                            h_stride, w_stride = h * self.strides[
                                0], w * self.strides[1]
                            dinputs[b, c,
                                    h_stride:h_stride + self.kernel_size[0],
                                    w_stride:w_stride +
                                    self.kernel_size[1]] += self.weights[
                                        f, c] * grad[b, f, h, w]

        return dinputs
示例#9
0
 def pass_backward(self, grad):
     d_inputs = np.matmul(grad, self.one_hot_inputs)
     d_embeddings = np.sum(d_inputs, axis=0)
     self.weights = optimizer(self.weight_optimizer).update(
         self.weights, d_embeddings.T)