예제 #1
0
    def _do_for_params(self, horizon):

        # setup test data
        numpy.random.seed(42)
        n_step, n_node, n_cvar, n_svar, n_thread = 100, 5, 2, 4, self.n_thread
        cvars = numpy.random.randint(0, n_svar, n_cvar).astype(numpy.int32)
        out = numpy.zeros((n_step, n_node, n_cvar, n_thread), numpy.float32)
        delays = numpy.random.randint(0, horizon - 2,
                                      (n_node, n_node)).astype(numpy.int32)
        weights = numpy.random.randn(n_node, n_node).astype(numpy.float32)
        weights[numpy.random.rand(*weights.shape) < 0.25] = 0.0
        state = numpy.random.randn(n_step, n_node, n_svar,
                                   n_thread).astype(numpy.float32)
        buf = numpy.zeros((n_node, horizon, n_cvar, n_thread), numpy.float32)
        # debugging
        delayed_step = numpy.zeros_like(delays)

        # setup cu functions
        pre = cu_linear_cfe_pre(0.0, 1.0, 0.0)
        post = cu_linear_cfe_post(1.0, 0.0)
        dcf = cu_delay_cfun(horizon,
                            pre,
                            post,
                            n_cvar,
                            self.block_dim[0],
                            step_stride=1,
                            aff_node_stride=1)

        # run it
        @self.jit_and_run(out, delays, weights, state, cvars,
                          buf)  #,delayed_step)
        def kernel(out, delays, weights, state, cvars, buf):  #, delayed_step):
            i_thread = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
            for step in range(state.shape[0]):
                for i_post in range(state.shape[1]):
                    dcf(out, delays, weights, state, i_post, i_thread, step,
                        cvars, buf)  #,delayed_step)

        # ensure buffer is updating correctly
        buf_state = numpy.roll(state[:, :, cvars][-horizon:].transpose(
            (1, 0, 2, 3)),
                               n_step,
                               axis=1)
        numpy.testing.assert_allclose(buf, buf_state)

        # ensure buffer time indexing is correct
        # numpy.testing.assert_equal(delayed_step, (n_step - 1 - delays + horizon) % horizon)

        # replay
        nodes = numpy.tile(numpy.r_[:n_node], (n_node, 1))
        for step in range(horizon + 3, n_step):
            delayed_state = state[:, :, cvars][
                step - delays, nodes]  # (n_node, n_node, n_cvar, n_thread)
            afferent = (weights.reshape(
                (n_node, n_node, 1, 1)) * delayed_state).sum(
                    axis=1)  # (n_node, n_cvar, n_thread)
            numpy.testing.assert_allclose(afferent, out[step], 1e-5, 1e-6)
예제 #2
0
    def test_linear_pre(self):
        ai, aj, intercept = -0.2, 0.3, 0.25
        out = numpy.zeros((self.n_thread,), 'f')
        xj, xi = numpy.random.rand(2, self.n_thread).astype('f')
        pre = cu_linear_cfe_pre(ai, aj, intercept)

        @self.jit_and_run(out, xi, xj)
        def kernel(out, xi, xj):
            t = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
            out[t] = pre(xi[t], xj[t])

        numpy.testing.assert_allclose(out, ai * xi + aj * xj + intercept, 1e-4, 1e-5)