def _do_for_params(self, horizon): # setup test data numpy.random.seed(42) n_step, n_node, n_cvar, n_svar, n_thread = 100, 5, 2, 4, self.n_thread cvars = numpy.random.randint(0, n_svar, n_cvar).astype(numpy.int32) out = numpy.zeros((n_step, n_node, n_cvar, n_thread), numpy.float32) delays = numpy.random.randint(0, horizon - 2, (n_node, n_node)).astype(numpy.int32) weights = numpy.random.randn(n_node, n_node).astype(numpy.float32) weights[numpy.random.rand(*weights.shape) < 0.25] = 0.0 state = numpy.random.randn(n_step, n_node, n_svar, n_thread).astype(numpy.float32) buf = numpy.zeros((n_node, horizon, n_cvar, n_thread), numpy.float32) # debugging delayed_step = numpy.zeros_like(delays) # setup cu functions pre = cu_linear_cfe_pre(0.0, 1.0, 0.0) post = cu_linear_cfe_post(1.0, 0.0) dcf = cu_delay_cfun(horizon, pre, post, n_cvar, self.block_dim[0], step_stride=1, aff_node_stride=1) # run it @self.jit_and_run(out, delays, weights, state, cvars, buf) #,delayed_step) def kernel(out, delays, weights, state, cvars, buf): #, delayed_step): i_thread = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x for step in range(state.shape[0]): for i_post in range(state.shape[1]): dcf(out, delays, weights, state, i_post, i_thread, step, cvars, buf) #,delayed_step) # ensure buffer is updating correctly buf_state = numpy.roll(state[:, :, cvars][-horizon:].transpose( (1, 0, 2, 3)), n_step, axis=1) numpy.testing.assert_allclose(buf, buf_state) # ensure buffer time indexing is correct # numpy.testing.assert_equal(delayed_step, (n_step - 1 - delays + horizon) % horizon) # replay nodes = numpy.tile(numpy.r_[:n_node], (n_node, 1)) for step in range(horizon + 3, n_step): delayed_state = state[:, :, cvars][ step - delays, nodes] # (n_node, n_node, n_cvar, n_thread) afferent = (weights.reshape( (n_node, n_node, 1, 1)) * delayed_state).sum( axis=1) # (n_node, n_cvar, n_thread) numpy.testing.assert_allclose(afferent, out[step], 1e-5, 1e-6)
def test_linear_post(self): slope, intercept = 0.2, 0.5 out = numpy.zeros((self.n_thread, ), 'f') state = numpy.random.rand(self.n_thread).astype('f') post = cu_linear_cfe_post(slope, intercept) @self.jit_and_run(out, state) def kernel(out, state): t = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x out[t] = post(state[t]) numpy.testing.assert_allclose(out, state*slope + intercept, 1e-4, 1e-5)