def forward_gpu(self, inputs): w, = inputs xp = cuda.get_array_module(w) och, ich, _, ny, nx = w.shape nto, nti = self.T.shape[:2] rotated_w = xp.empty((och, nto, ich, nti, ny, nx), dtype=w.dtype) index_group_func_kernel(input=w, T=self.T, U=self.U, V=self.V, output=rotated_w) return rotated_w,
def forward_gpu(self, inputs): w, = inputs xp = cuda.get_array_module(w) och, ich, _, ny, nx = w.shape nto, nti = self.T.shape[:2] rotated_w = xp.empty((och, nto, ich, nti, ny, nx), dtype=w.dtype) index_group_func_kernel( input=w, T=self.T, U=self.U, V=self.V, output=rotated_w ) return rotated_w,
def test_index_group_func(): import numpy as np import cupy as cp from chainer import cuda input = np.random.randn(2, 3, 4, 5, 6) I = np.random.randint(0, 4, (7, 8, 9, 10)) J = np.random.randint(0, 5, (7, 8, 9, 10)) K = np.random.randint(0, 6, (7, 8, 9, 10)) output = input[..., I, J, K].swapaxes(1, 2) cpoutput = cp.zeros(output.shape) cpinput = cuda.to_gpu(input) cpI = cuda.to_gpu(I) cpJ = cuda.to_gpu(J) cpK = cuda.to_gpu(K) index_group_func_kernel(cpinput, cpI, cpJ, cpK, cpoutput) cpoutput = cuda.to_cpu(cpoutput) error = np.abs(cpoutput - output).sum() print error assert np.isclose(error, 0.)