示例#1
0
    def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev):
        """2-fold for loop implementation."""
        m, H, W, C = dY.shape

        stride_H, stride_W = self.stride
        k_H, k_W = self.kernel_size
        for h in range(H):
            # slice boundaries in H direction
            h_start = h * stride_H
            h_end = h * stride_H + k_H
            for w in range(W):
                # slice boundaries in W directions
                w_start = w * stride_W
                w_end = w * stride_W + k_W

                # (m, k, k, C_prev)
                x_slice = x_pad[:, h_start:h_end, w_start:w_end, :]
                # (m, 1, 1, C_prev)
                dY_ = np.expand_dims(dY[:, h, w, :], axis=(1, 2))

                if self.mode == "max":
                    mask = x_slice == np.max(x_slice,
                                             axis=(1, 2),
                                             keepdims=True)
                    dX_pad[:, h_start:h_end, w_start:w_end, :] += dY_ * mask

                elif self.mode == "avg":
                    avg_volume = np.ones((m, k_H, k_W, C)) / (k_H * k_W)
                    dX_pad[:, h_start:h_end,
                           w_start:w_end, :] += (dY_ * avg_volume)

        # slice the gradient tensor to original size
        dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev))

        return dX
示例#2
0
    def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev):
        """2-fold for loop implementation."""
        m, H, W, C = dY.shape

        stride_H, stride_W = self.stride
        k_H, k_W = self.kernel_size
        for i in range(m):
            for h in range(H):
                # slice boundaries in H direction
                h_start = h * stride_H
                h_end = h * stride_H + k_H
                for w in range(W):
                    # slice boundaries in W directions
                    w_start = w * stride_W
                    w_end = w * stride_W + k_W

                    for c in range(C):
                        # (k, k)
                        x_slice = x_pad[i, h_start:h_end, w_start:w_end, c]

                        if self.mode == "max":
                            mask = x_slice == np.max(x_slice)
                            dX_pad[i, h_start:h_end, w_start:w_end,
                                   c] += (dY[i, h, w, c] * mask)

                        elif self.mode == "avg":
                            avg_volume = np.ones((k_H, k_W)) / (k_H * k_W)
                            dX_pad[i, h_start:h_end, w_start:w_end,
                                   c] += (dY[i, h, w, c] * avg_volume)

        # slice the gradient tensor to original size
        dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev))

        return dX
示例#3
0
    def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev):
        """4-fold for loop implementation."""
        m, H, W, _ = dY.shape

        stride_H, stride_W = self.stride
        k_H, k_W = self.kernel_size
        # loop over samples
        for i in range(m):
            for h in range(H):
                # slice boundaries in H direction
                h_start = h * stride_H
                h_end = h * stride_H + k_H
                for w in range(W):
                    # slice boundaries in W directions
                    w_start = w * stride_W
                    w_end = w * stride_W + k_W

                    # (k, k, C_prev)
                    x_slice = x_pad[i, h_start:h_end, w_start:w_end, :]
                    # loop over output channels
                    for c in range(self.out_channels):
                        # (k, k, C_prev)
                        weights = self.W[..., c]

                        # (k, k, C_prev)
                        dX_pad[i, h_start:h_end,
                               w_start:w_end, :] += (weights * dY[i, h, w, c])

                        # (k, k, C_prev)
                        self.dW[..., c] += x_slice * dY[i, h, w, c]
                        # (1, )
                        self.db[c] += dY[i, h, w, c]

            # slice the gradient tensor to original size
            dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev))

        return dX
示例#4
0
    def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev):
        """3-fold for loop implementation."""
        m, H, W, _ = dY.shape

        stride_H, stride_W = self.stride
        k_H, k_W = self.kernel_size
        for h in range(H):
            # slice boundaries in H direction
            h_start = h * stride_H
            h_end = h * stride_H + k_H
            for w in range(W):
                # slice boundaries in W directions
                w_start = w * stride_W
                w_end = w * stride_W + k_W

                # (m, k, k, C_prev)
                x_slice = x_pad[:, h_start:h_end, w_start:w_end, :]
                # loop over output channels
                for c in range(self.out_channels):
                    # (m, k, k, C_prev)
                    weights = np.repeat(self.W[np.newaxis, ..., c],
                                        repeats=m,
                                        axis=0)
                    # (m, 1, 1, 1)
                    dY_ = np.expand_dims(dY[:, h, w, c], axis=(1, 2, 3))
                    # (m, k, k, C_prev)
                    dX_pad[:, h_start:h_end, w_start:w_end, :] += weights * dY_

                    # (k, k, C_prev)
                    self.dW[..., c] += np.sum(x_slice * dY_, axis=0)
                    # (1, )
                    self.db[c] += np.sum(dY_)

        # slice the gradient tensor to original size
        dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev))

        return dX
示例#5
0
    def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev):
        """2-fold for loop implementation."""
        m, H, W, _ = dY.shape

        stride_H, stride_W = self.stride
        k_H, k_W = self.kernel_size
        for h in range(H):
            # slice boundaries in H direction
            h_start = h * stride_H
            h_end = h * stride_H + k_H
            for w in range(W):
                # slice boundaries in W directions
                w_start = w * stride_W
                w_end = w * stride_W + k_W

                # (m, k, k, C_prev, C)
                weights = np.repeat(np.expand_dims(self.W, 0),
                                    repeats=m,
                                    axis=0)
                # (m, 1, 1, 1, C)
                dY_ = np.expand_dims(dY[:, h, w, :], axis=(1, 2, 3))
                # (m, k, k, C_prev)
                dX_pad[:, h_start:h_end,
                       w_start:w_end, :] += np.sum(weights * dY_, axis=4)

                # (m, k, k, C_prev, 1)
                x_slice = x_pad[:, h_start:h_end, w_start:w_end, :, np.newaxis]
                # (k, k, C_prev, C)
                self.dW += np.sum(x_slice * dY_, axis=0)
                # (C, )
                self.db += np.sum(dY_, axis=(0, 1, 2, 3))

        # slice the gradient tensor to original size
        dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev))

        return dX
示例#6
0
    def _backward_compute(self, col_matrix, dX_pad, dY, H_prev, W_prev):
        """im2col implementation."""
        m, H, W, _ = dY.shape

        # (m, H, W, C) -> (m, H x W, C)
        dY = dY.reshape((m, -1, self.out_channels))
        # (k, k, C_prev, C) -> (k x k x C_prev, C) -> (1, k x k x C_prev, C)
        weights = self.W.reshape((-1, self.out_channels))[np.newaxis, ...]

        # gradients of activation
        stride_H, stride_W = self.stride
        k_H, k_W = self.kernel_size
        dcol_matrix = np.matmul(dY, weights.transpose(0, 2, 1))
        for h in range(H):
            # slice boundaries in H direction
            h_start = h * stride_H
            h_end = h * stride_H + k_H
            for w in range(W):
                # slice boundaries in W direction
                w_start = w * stride_W
                w_end = w * stride_W + k_W

                idx = h * H + w
                drow = dcol_matrix[:, idx, :].reshape((m, k_H, k_W, -1))
                dX_pad[:, h_start:h_end, w_start:w_end, :] += drow

        # slice the gradient tensor to original size
        dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev))

        # gradients of weights & biases
        dW = np.sum(np.matmul(col_matrix.transpose(0, 2, 1), dY), axis=0)
        self.dW[:] = dW.reshape(
            (*self.kernel_size, self.in_channels, self.out_channels))
        self.db[:] = np.sum(dY, axis=(0, 1))

        return dX