Пример #1
0
    def forward_gpu(self, inputs):
        w, = inputs
        xp = cuda.get_array_module(w)
        och, ich, _, ny, nx = w.shape

        nto, nti = self.T.shape[:2]
        rotated_w = xp.empty((och, nto, ich, nti, ny, nx), dtype=w.dtype)

        index_group_func_kernel(input=w,
                                T=self.T,
                                U=self.U,
                                V=self.V,
                                output=rotated_w)

        return rotated_w,
Пример #2
0
    def forward_gpu(self, inputs):

        w, = inputs
        xp = cuda.get_array_module(w)
        och, ich, _, ny, nx = w.shape

        nto, nti = self.T.shape[:2]
        rotated_w = xp.empty((och, nto, ich, nti, ny, nx), dtype=w.dtype)

        index_group_func_kernel(
            input=w,
            T=self.T,
            U=self.U,
            V=self.V,
            output=rotated_w
        )

        return rotated_w,
def test_index_group_func():
    import numpy as np
    import cupy as cp
    from chainer import cuda
    input = np.random.randn(2, 3, 4, 5, 6)
    I = np.random.randint(0, 4, (7, 8, 9, 10))
    J = np.random.randint(0, 5, (7, 8, 9, 10))
    K = np.random.randint(0, 6, (7, 8, 9, 10))

    output = input[..., I, J, K].swapaxes(1, 2)

    cpoutput = cp.zeros(output.shape)
    cpinput = cuda.to_gpu(input)
    cpI = cuda.to_gpu(I)
    cpJ = cuda.to_gpu(J)
    cpK = cuda.to_gpu(K)

    index_group_func_kernel(cpinput, cpI, cpJ, cpK, cpoutput)

    cpoutput = cuda.to_cpu(cpoutput)

    error = np.abs(cpoutput - output).sum()
    print error
    assert np.isclose(error, 0.)