示例#1
0
 def check_cholesky(self, N, lower=True, rtol=None, atol=None):
     A = self.rand_symmetric(N)
     L = self.run_gpu_cholesky(A, lower=lower)
     if not lower:
         L = L.T
     utt.assert_allclose(np.dot(L, L.T), A, rtol=rtol, atol=atol)
示例#2
0
def test_conv3d(border_mode):
    if aesara.config.mode == "FAST_COMPILE":
        mode = aesara.compile.mode.get_mode("FAST_RUN")
    else:
        mode = aesara.compile.mode.get_default_mode()

    Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
    Nf, Tf, C, Hf, Wf = 32, 5, 3, 5, 5

    signals = (np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs,
                                                        Ws).astype("float32"))
    filters = (np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf,
                                                        Wf).astype("float32"))

    # t0 = time.time()
    pyres = pyconv3d(signals, filters, border_mode)
    # print(time.time() - t0)

    s_signals = shared(signals)
    s_filters = shared(filters)
    s_output = shared(signals * 0)

    out = conv3d(
        s_signals,
        s_filters,
        signals_shape=signals.shape,
        filters_shape=filters.shape,
        border_mode=border_mode,
    )

    newconv3d = aesara.function([], [], updates={s_output: out}, mode=mode)

    check_diagonal_subtensor_view_traces(newconv3d)
    # t0 = time.time()
    newconv3d()
    # print(time.time() - t0)
    utt.assert_allclose(pyres, s_output.get_value(borrow=True))
    gsignals, gfilters = aesara.grad(out.sum(), [s_signals, s_filters])
    gnewconv3d = aesara.function(
        [],
        [],
        updates=[(s_filters, gfilters), (s_signals, gsignals)],
        mode=mode,
        name="grad",
    )
    check_diagonal_subtensor_view_traces(gnewconv3d)

    # t0 = time.time()
    gnewconv3d()
    # print("grad", time.time() - t0)

    Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
    Nf, Tf, C, Hf, Wf = 4, 2, 3, 2, 2

    rng = np.random.default_rng(280284)

    signals = rng.random((Ns, Ts, C, Hs, Ws)).astype("float32")
    filters = rng.random((Nf, Tf, C, Hf, Wf)).astype("float32")
    utt.verify_grad(
        lambda s, f: conv3d(s, f, border_mode=border_mode),
        [signals, filters],
        eps=1e-1,
        mode=mode,
    )

    # Additional Test that covers the case of patched implementation for filter with Tf=1
    Ns, Ts, C, Hs, Ws = 3, 10, 3, 32, 32
    Nf, Tf, C, Hf, Wf = 32, 1, 3, 5, 5

    signals = (np.arange(Ns * Ts * C * Hs * Ws).reshape(Ns, Ts, C, Hs,
                                                        Ws).astype("float32"))
    filters = (np.arange(Nf * Tf * C * Hf * Wf).reshape(Nf, Tf, C, Hf,
                                                        Wf).astype("float32"))

    # t0 = time.time()
    pyres = pyconv3d(signals, filters, border_mode)
    # print(time.time() - t0)

    s_signals = shared(signals)
    s_filters = shared(filters)
    s_output = shared(signals * 0)

    out = conv3d(
        s_signals,
        s_filters,
        signals_shape=signals.shape,
        filters_shape=filters.shape,
        border_mode=border_mode,
    )

    newconv3d = aesara.function([], [], updates={s_output: out}, mode=mode)

    # t0 = time.time()
    newconv3d()
    # print(time.time() - t0)
    utt.assert_allclose(pyres, s_output.get_value(borrow=True))
    gsignals, gfilters = aesara.grad(out.sum(), [s_signals, s_filters])
    gnewconv3d = aesara.function(
        [],
        [],
        updates=[(s_filters, gfilters), (s_signals, gsignals)],
        mode=mode,
        name="grad",
    )

    # t0 = time.time()
    gnewconv3d()
    # print("grad", time.time() - t0)

    Ns, Ts, C, Hs, Ws = 3, 3, 3, 5, 5
    Nf, Tf, C, Hf, Wf = 4, 1, 3, 2, 2

    signals = rng.random((Ns, Ts, C, Hs, Ws)).astype("float32")
    filters = rng.random((Nf, Tf, C, Hf, Wf)).astype("float32")
    utt.verify_grad(
        lambda s, f: conv3d(s, f, border_mode=border_mode),
        [signals, filters],
        eps=1e-1,
        mode=mode,
    )
示例#3
0
    def test_one_sequence_one_output_weights_gpu2(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=self.mode_with_gpu,
        )

        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=self.mode_with_gpu,
        )

        # get random initial values
        rng = np.random.default_rng(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
        aesara_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_values, v_out)

        topo = f2.maker.fgraph.toposort()
        assert (sum([
            isinstance(node.op, self.gpu_backend.HostFromGpu) for node in topo
        ]) == 1)
        assert (sum([
            isinstance(node.op, self.gpu_backend.GpuFromHost) for node in topo
        ]) == 4)

        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any([
            isinstance(node.op, self.gpu_backend.GpuElemwise)
            for node in scan_node_topo
        ])
        assert not any([
            isinstance(node.op, self.gpu_backend.HostFromGpu)
            for node in scan_node_topo
        ])
        assert not any([
            isinstance(node.op, self.gpu_backend.GpuFromHost)
            for node in scan_node_topo
        ])
示例#4
0
 def check_svd(self, A, U, S, VT, rtol=None, atol=None):
     S_m = np.zeros_like(A)
     np.fill_diagonal(S_m, S)
     utt.assert_allclose(np.dot(np.dot(U, S_m), VT), A, rtol=rtol, atol=atol)
示例#5
0
    def test_one_sequence_one_output_weights_gpu1(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = theano.tensor.fvector("u")
        x0 = theano.tensor.fscalar("x0")
        W_in = theano.tensor.fscalar("win")
        W = theano.tensor.fscalar("w")

        # The following line is needed to have the first case being used
        # Otherwise, it is the second that is tested.
        mode = self.mode_with_gpu.excluding("InputToGpuOptimizer")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode,
        )

        output = self.gpu_backend.gpu_from_host(output)
        f2 = theano.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=self.mode_with_gpu,
        )

        # get random initial values
        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4,), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        v_u = np.asarray(v_u, dtype="float32")
        v_x0 = np.asarray(v_x0, dtype="float32")
        W = np.asarray(W, dtype="float32")
        W_in = np.asarray(W_in, dtype="float32")

        # compute the output in numpy
        v_out = np.zeros((4,))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W
        theano_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_values, v_out)

        # TO DEL
        topo = f2.maker.fgraph.toposort()
        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]

        topo = f2.maker.fgraph.toposort()
        assert (
            sum([isinstance(node.op, self.gpu_backend.HostFromGpu) for node in topo])
            == 0
        )
        assert (
            sum([isinstance(node.op, self.gpu_backend.GpuFromHost) for node in topo])
            == 4
        )

        scan_node = [node for node in topo if isinstance(node.op, Scan)]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any(
            [
                isinstance(node.op, self.gpu_backend.GpuElemwise)
                for node in scan_node_topo
            ]
        )
        assert not any(
            [
                isinstance(node.op, self.gpu_backend.HostFromGpu)
                for node in scan_node_topo
            ]
        )
        assert not any(
            [
                isinstance(node.op, self.gpu_backend.GpuFromHost)
                for node in scan_node_topo
            ]
        )
示例#6
0
    def test_gpu3_mixture_dtype_outputs(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return (u_t * W_in + x_tm1 * W, at.cast(u_t + x_tm1, "int64"))

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")
        output, updates = scan(
            f_rnn,
            u,
            [x0, None],
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode_with_gpu,
        )

        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=mode_with_gpu,
        )

        # get random initial values
        rng = np.random.default_rng(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        # compute the output in numpy
        v_out1 = np.zeros((4, ))
        v_out2 = np.zeros((4, ), dtype="int64")
        v_out1[0] = v_u[0] * W_in + v_x0 * W
        v_out2[0] = v_u[0] + v_x0
        for step in range(1, 4):
            v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W
            v_out2[step] = np.int64(v_u[step] + v_out1[step - 1])

        aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_out1, v_out1)
        utt.assert_allclose(aesara_out2, v_out2)

        topo = f2.maker.fgraph.toposort()
        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        assert scan_node.op.gpua

        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert not any(
            isinstance(node.op, HostFromGpu) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, GpuFromHost) for node in scan_node_topo)
示例#7
0
 def _cmp(self, n, m, f, f_gpu):
     data = np.arange(n * m, dtype="float32").reshape(n, m)
     out = f(data)
     gout = f_gpu(data)
     utt.assert_allclose(out, gout)
示例#8
0
    def run_conv_fwd(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        filters_val = np.random.random(filters_shape).astype(dtype)

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = aesara.shared(inputs_val)
        filters = aesara.shared(filters_val)

        if beta == 0:
            out = None
        else:
            out = self.array_like_conv_output(inputs_shape, filters_shape,
                                              border_mode, subsample, dilation,
                                              dtype)
            out /= 10
        # Compile an Aesara function for the cuDNN implementation
        conv = dnn_conv(
            img=inputs,
            kerns=filters,
            alpha=alpha,
            beta=beta,
            out=out,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )
        f = aesara.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == "conv":
            if inputs.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile an Aesara function for the reference implementation
        conv_ref = self.cpu_conv_class(border_mode=border_mode,
                                       subsample=subsample,
                                       filter_dilation=dilation)(
                                           ref_cast(inputs), flipped_filters)
        f_ref = aesara.function([], conv_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_fwd_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * out
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
示例#9
0
def test_numpy_method(fct, value):
    x = dscalar("x")
    y = fct(x)
    f = aesara.function([x], y)
    utt.assert_allclose(np.nan_to_num(f(value)), np.nan_to_num(fct(value)))
示例#10
0
    def cmp(n, m):
        data = np.random.uniform(0, 1, (n, m)).astype(dtype=dtypeInput)

        out = f(data)
        gout = f_gpu(data)
        utt.assert_allclose(out, gout)
示例#11
0
def test_batch_normalization():
    def bn_ref(x, G, B, M, V):
        n = (x - M) / V
        return n * G + B

    np.random.seed(1234)
    X = 1 + np.random.random([10, 20]).astype("float32")
    B = 1 + np.random.random([20]).astype("float32")
    G = 1 + np.random.random([20]).astype("float32")
    M = 1 + np.random.random([20]).astype("float32")
    V = 1 + np.random.random([20]).astype("float32")

    x = matrix("x")
    b = vector("b")
    g = vector("g")
    m = vector("m")
    v = vector("v")

    bn_ref_op = bn_ref(x, g, b, m, v)
    f_ref = aesara.function([x, g, b, m, v], [bn_ref_op])
    res_ref = f_ref(X, G, B, M, V)
    for mode in ["low_mem", "high_mem"]:
        bn_op = batchnorm.batch_normalization(x, g, b, m, v, mode=mode)
        f = aesara.function([x, g, b, m, v], [bn_op])
        res = f(X, G, B, M, V)
        utt.assert_allclose(res_ref, res)

        def bn_f(inputs, gamma, beta, mean, std):
            return batchnorm.batch_normalization(inputs,
                                                 gamma,
                                                 beta,
                                                 mean,
                                                 std,
                                                 mode=mode)

        utt.verify_grad(bn_f, [X, G, B, M, V])

    bn_ref_op = bn_ref(x, g, b, x.mean(axis=0, keepdims=True),
                       x.std(axis=0, keepdims=True))
    f_ref = aesara.function([x, b, g], [bn_ref_op])
    res_ref = f_ref(X, G, B)
    for mode in ["low_mem", "high_mem"]:
        bn_op = batchnorm.batch_normalization(
            x,
            g,
            b,
            x.mean(axis=0, keepdims=True),
            x.std(axis=0, keepdims=True),
            mode=mode,
        )
        f = aesara.function([x, b, g], [bn_op])
        res = f(X, G, B)
        utt.assert_allclose(res_ref, res)

        def bn_f(inputs, gamma, beta, mean, std):
            return batchnorm.batch_normalization(inputs,
                                                 gamma,
                                                 beta,
                                                 mean,
                                                 std,
                                                 mode=mode)

        utt.verify_grad(
            bn_f,
            [X, G, B,
             X.mean(axis=0)[np.newaxis],
             X.std(axis=0)[np.newaxis]])
示例#12
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    # This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias
    # We check that we loop when their is too much threads

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, aesara.compile.debugmode.DebugMode):
        n_in = 4098
        n_out = 4099

    y = lvector("y")

    b = fvector("b")

    # we precompute the dot with big shape before to allow the test of
    # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    # (the launch timed out and was terminated) on GPU card not
    # powerful enough. We need the big shape to check for corner
    # case.
    dot_result = fmatrix("dot_result")

    xx = np.asarray(np.random.rand(batch_size, n_in), dtype=np.float32)
    yy = np.ones((batch_size,), dtype="int32")
    b_values = np.zeros((n_out,), dtype="float32")
    W_values = np.asarray(np.random.rand(n_in, n_out), dtype="float32")

    dot_value = np.asarray(np.dot(xx, W_values), dtype="float32")
    del W_values
    p_y_given_x = aesara.tensor.nnet.softmax(dot_result + b)
    y_pred = argmax(p_y_given_x, axis=-1)
    loss = -mean(log(p_y_given_x)[at.arange(y.shape[0]), y])
    dW = grad(loss, dot_result)
    classify = aesara.function(
        inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_without_gpu
    )
    classify_gpu = aesara.function(
        inputs=[y, b, dot_result], outputs=[loss, y_pred, dW], mode=mode_with_gpu
    )

    assert any(
        [
            isinstance(
                node.op, aesara.tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias
            )
            for node in classify.maker.fgraph.toposort()
        ]
    )
    assert any(
        [
            isinstance(node.op, GpuCrossentropySoftmaxArgmax1HotWithBias)
            for node in classify_gpu.maker.fgraph.toposort()
        ]
    )

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    utt.assert_allclose(out[0], gout[0])
    utt.assert_allclose(out[2], gout[2], atol=3e-6)
    utt.assert_allclose(out[1], gout[1])
示例#13
0
def test_h_softmax():
    # Tests the output dimensions of the h_softmax when a target is provided or
    # not.

    input_size = 4
    batch_size = 2
    h_softmax_level1_size = 5
    h_softmax_level2_size = 3
    output_size = h_softmax_level1_size * h_softmax_level2_size

    # First level of h_softmax
    W1 = np.asarray(np.random.normal(size=(input_size, h_softmax_level1_size)),
                    dtype=config.floatX)
    W1 = aesara.shared(W1)
    b1 = aesara.shared(
        np.asarray(np.zeros((h_softmax_level1_size, )), dtype=config.floatX))

    # Second level of h_softmax
    W2 = np.asarray(
        np.random.normal(size=(h_softmax_level1_size, input_size,
                               h_softmax_level2_size)),
        dtype=config.floatX,
    )
    W2 = aesara.shared(W2)
    b2 = aesara.shared(
        np.asarray(
            np.zeros((h_softmax_level1_size, h_softmax_level2_size)),
            dtype=config.floatX,
        ))

    x = matrix("x")
    y = ivector("y")

    # This only computes the output corresponding to the target
    y_hat_tg = h_softmax(
        x,
        batch_size,
        output_size,
        h_softmax_level1_size,
        h_softmax_level2_size,
        W1,
        b1,
        W2,
        b2,
        y,
    )

    # This computes all the outputs
    y_hat_all = h_softmax(
        x,
        batch_size,
        output_size,
        h_softmax_level1_size,
        h_softmax_level2_size,
        W1,
        b1,
        W2,
        b2,
    )

    fun_output_tg = aesara.function([x, y], y_hat_tg)
    fun_output = aesara.function([x], y_hat_all)

    x_mat = np.random.normal(size=(batch_size,
                                   input_size)).astype(config.floatX)
    y_mat = np.random.default_rng().integers(0, output_size,
                                             batch_size).astype("int32")
    tg_output = fun_output_tg(x_mat, y_mat)
    all_outputs = fun_output(x_mat)

    assert tg_output.shape == (batch_size, )
    assert all_outputs.shape == (batch_size, output_size)

    # Verifies that the outputs computed by fun_output_tg are the same as those
    # computed by fun_output.
    utt.assert_allclose(all_outputs[np.arange(0, batch_size), y_mat],
                        tg_output)
示例#14
0
    def test_DownsampleFactorMaxStride(self):
        rng = np.random.RandomState(utt.fetch_seed())
        # maxpool, stride, ignore_border, input, output sizes
        examples = (
            ((1, 1), (1, 1), True, (4, 10, 16, 16), (4, 10, 16, 16)),
            ((1, 1), (5, 7), True, (4, 10, 16, 16), (4, 10, 4, 3)),
            ((1, 1), (1, 1), False, (4, 10, 16, 16), (4, 10, 16, 16)),
            ((1, 1), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
            ((3, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 14, 14)),
            ((3, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 5, 5)),
            ((3, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
            ((3, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 14, 14)),
            ((3, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
            ((3, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
            ((5, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 12, 14)),
            ((5, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 4, 5)),
            ((5, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
            ((5, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 12, 14)),
            ((5, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 5, 6)),
            ((5, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
            ((16, 16), (1, 1), True, (4, 10, 16, 16), (4, 10, 1, 1)),
            ((16, 16), (5, 7), True, (4, 10, 16, 16), (4, 10, 1, 1)),
            ((16, 16), (1, 1), False, (4, 10, 16, 16), (4, 10, 1, 1)),
            ((16, 16), (5, 7), False, (4, 10, 16, 16), (4, 10, 1, 1)),
            ((3, ), (5, ), True, (16, ), (3, )),
            (
                (3, ),
                (5, ),
                True,
                (
                    2,
                    16,
                ),
                (
                    2,
                    3,
                ),
            ),
            (
                (5, ),
                (3, ),
                True,
                (
                    2,
                    3,
                    16,
                ),
                (
                    2,
                    3,
                    4,
                ),
            ),
            ((5, 1, 3), (3, 3, 3), True, (2, 16, 16, 16), (2, 4, 6, 5)),
            ((5, 1, 3), (3, 3, 3), True, (4, 2, 16, 16, 16), (4, 2, 4, 6, 5)),
        )

        for example, mode in product(
                examples,
            ["max", "sum", "average_inc_pad", "average_exc_pad"]):
            (maxpoolshp, stride, ignore_border, inputshp, outputshp) = example
            # generate random images
            imval = rng.rand(*inputshp)
            images = theano.shared(imval)
            # Pool op
            numpy_output_val = self.numpy_max_pool_nd_stride(
                imval, maxpoolshp, ignore_border, stride, mode)
            assert (numpy_output_val.shape == outputshp
                    ), "outshape is {}, calculated shape is {}".format(
                        outputshp,
                        numpy_output_val.shape,
                    )
            maxpool_op = Pool(ndim=len(maxpoolshp),
                              ignore_border=ignore_border,
                              mode=mode)(images, maxpoolshp, stride)
            f = function([], maxpool_op)
            output_val = f()
            utt.assert_allclose(output_val, numpy_output_val)
示例#15
0
def test_batch_normalization_train():
    utt.seed_rng()

    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
        for vartype in (tensor5, tensor3, vector):
            x, scale, bias, running_mean, running_var = (
                vartype(n)
                for n in ("x", "scale", "bias", "running_mean", "running_var")
            )
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used
            running_average_factor = 0.3

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            (
                out,
                x_mean,
                x_invstd,
                out_running_mean,
                out_running_var,
            ) = batchnorm.batch_normalization_train(
                x,
                scale,
                bias,
                axes,
                eps,
                running_average_factor,
                running_mean,
                running_var,
            )
            # reference forward pass
            if axes == "per-activation":
                axes2 = (0,)
            elif axes == "spatial":
                axes2 = (0,) + tuple(range(2, ndim))
            else:
                axes2 = axes
            x_mean2 = x.mean(axis=axes2, keepdims=True)
            x_var2 = x.var(axis=axes2, keepdims=True)
            x_invstd2 = aet.reciprocal(aet.sqrt(x_var2 + eps))
            scale2 = aet.addbroadcast(scale, *axes2)
            bias2 = aet.addbroadcast(bias, *axes2)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            m = aet.cast(
                aet.prod(x.shape) / aet.prod(scale.shape), aesara.config.floatX
            )
            out_running_mean2 = (
                running_mean * (1 - running_average_factor)
                + x_mean2 * running_average_factor
            )
            out_running_var2 = (
                running_var * (1 - running_average_factor)
                + (m / (m - 1)) * x_var2 * running_average_factor
            )
            # backward pass
            dy = vartype("dy")
            grads = aet.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = aet.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # second-order backward pass
            dx = vartype("dinputs")
            dscale = vartype("dscale")
            dbias = vartype("dbias")
            grad_grads = aet.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict(
                    {grads[0]: dx, grads[1]: dscale, grads[2]: dbias}
                ),
                consider_constant=[
                    x,
                    dy,
                    scale,
                    bias,
                    x_mean,
                    x_invstd,
                    running_mean,
                    running_var,
                ],
                return_disconnected="zero",
            )
            # reference second-order backward pass
            grad_grads2 = aet.grad(
                None,
                wrt=[x, dy, scale],
                known_grads=OrderedDict(
                    {grads2[0]: dx, grads2[1]: dscale, grads2[2]: dbias}
                ),
                consider_constant=[
                    x,
                    dy,
                    scale,
                    bias,
                    x_mean2,
                    x_var2,
                    running_mean,
                    running_var,
                ],
                return_disconnected="zero",
            )
            # compile
            f = aesara.function(
                [x, scale, bias, running_mean, running_var, dy, dx, dscale, dbias],
                [
                    out,
                    x_mean,
                    x_invstd,
                    out_running_mean,
                    out_running_var,
                    out2,
                    x_mean2,
                    x_invstd2,
                    out_running_mean2,
                    out_running_var2,
                ]
                + grads
                + grads2
                + grad_grads
                + grad_grads2,
            )
            # check if the abstract Ops have been replaced
            assert not any(
                [
                    isinstance(
                        n.op,
                        (
                            batchnorm.AbstractBatchNormTrain,
                            batchnorm.AbstractBatchNormInference,
                            batchnorm.AbstractBatchNormTrainGrad,
                        ),
                    )
                    for n in f.maker.fgraph.toposort()
                ]
            )
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(
                    1 if d in axes2 else s for d, s in enumerate(data_shape)
                )
                X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
                Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
                Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
                Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
                Running_mean = np.random.randn(*param_shape).astype(
                    aesara.config.floatX
                )
                Running_var = np.random.randn(*param_shape).astype(aesara.config.floatX)
                Dx = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
                Dscale = -1 + 2 * np.random.randn(*param_shape).astype(
                    aesara.config.floatX
                )
                Dbias = np.random.randn(*param_shape).astype(aesara.config.floatX)

                outputs = f(
                    X, Scale, Bias, Running_mean, Running_var, Dy, Dx, Dscale, Dbias
                )
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 5])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 5])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 5])  # invstd
                utt.assert_allclose(outputs[3], outputs[3 + 5])  # running_mean
                utt.assert_allclose(
                    np.nan_to_num(outputs[4]), np.nan_to_num(outputs[4 + 5])
                )  # running_var
                # compare gradients
                utt.assert_allclose(outputs[10], outputs[10 + 3], atol=1e-4)  # dx
                utt.assert_allclose(
                    outputs[11], outputs[11 + 3], rtol=2e-4, atol=1e-4
                )  # dscale
                utt.assert_allclose(outputs[12], outputs[12 + 3])  # dbias
                # compare second-order gradients
                utt.assert_allclose(outputs[16], outputs[16 + 3], atol=1e-4)  # ddx
                utt.assert_allclose(outputs[17], outputs[17 + 3])  # ddy
                utt.assert_allclose(
                    outputs[18], outputs[18 + 3], rtol=3e-4, atol=1e-4
                )  # ddscale
示例#16
0
    def validate(
        self,
        image_shape,
        filter_shape,
        border_mode="valid",
        subsample=(1, 1),
        input=None,
        filters=None,
        verify_grad=True,
        non_contiguous=False,
        filter_dilation=(1, 1),
    ):
        """
        :param image_shape: The constant shape info passed to corrMM.
        :param filter_shape: The constant shape info passed to corrMM.
        """
        if not theano.config.cxx:
            pytest.skip("Need cxx to test conv2d")
        N_image_shape = [
            tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in image_shape
        ]
        N_filter_shape = [
            tt.get_scalar_constant_value(tt.as_tensor_variable(x)) for x in filter_shape
        ]

        if input is None:
            input = self.input
        if filters is None:
            filters = self.filters

        # THEANO IMPLEMENTATION

        # we create a symbolic function so that verify_grad can work
        def sym_CorrMM(input, filters):
            # define theano graph and function
            input.name = "input"
            filters.name = "filters"
            rval = corr.CorrMM(border_mode, subsample, filter_dilation)(input, filters)
            rval.name = "corr_output"
            return rval

        output = sym_CorrMM(input, filters)
        output.name = f"CorrMM()({input.name},{filters.name})"
        theano_corr = theano.function([input, filters], output, mode=self.mode)

        # initialize input and compute result
        image_data = np.random.random(N_image_shape).astype(self.dtype)
        filter_data = np.random.random(N_filter_shape).astype(self.dtype)
        if non_contiguous:
            image_data = np.transpose(image_data, axes=(0, 1, 3, 2))
            image_data = image_data.copy()
            image_data = np.transpose(image_data, axes=(0, 1, 3, 2))
            filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2))
            filter_data = filter_data.copy()
            filter_data = np.transpose(filter_data, axes=(0, 1, 3, 2))
            assert not image_data.flags["CONTIGUOUS"]
            assert not filter_data.flags["CONTIGUOUS"]

        theano_output = theano_corr(image_data, filter_data)

        # REFERENCE IMPLEMENTATION
        # Testing correlation, not convolution. Reverse filters.
        filter_data_corr = np.array(filter_data[:, :, ::-1, ::-1], copy=True, order="C")
        orig_image_data = image_data
        img_shape2d = np.array(N_image_shape[-2:])
        fil_shape2d = np.array(N_filter_shape[-2:])
        dil_shape2d = np.array(filter_dilation)
        dil_fil_shape2d = (fil_shape2d - 1) * dil_shape2d + 1
        subsample2d = np.array(subsample)
        if border_mode == "full":
            padHW = dil_fil_shape2d - 1
        elif border_mode == "valid":
            padHW = np.array([0, 0])
        elif border_mode == "half":
            padHW = np.floor(dil_fil_shape2d / 2).astype("int32")
        elif isinstance(border_mode, tuple):
            padHW = np.array(border_mode)
        elif isinstance(border_mode, int):
            padHW = np.array([border_mode, border_mode])
        else:
            raise NotImplementedError(f"Unsupported border_mode {border_mode}")
        out_shape2d = (
            np.floor((img_shape2d + 2 * (padHW) - dil_fil_shape2d) / subsample2d) + 1
        )
        # avoid numpy deprecation
        out_shape2d = out_shape2d.astype("int32")
        out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
        ref_output = np.zeros(out_shape)

        # loop over output feature maps
        ref_output.fill(0)
        image_data2 = np.zeros(
            (
                N_image_shape[0],
                N_image_shape[1],
                N_image_shape[2] + 2 * padHW[0],
                N_image_shape[3] + 2 * padHW[1],
            )
        )
        image_data2[
            :,
            :,
            padHW[0] : padHW[0] + N_image_shape[2],
            padHW[1] : padHW[1] + N_image_shape[3],
        ] = image_data
        image_data = image_data2
        N_image_shape = image_data.shape
        for bb in range(N_image_shape[0]):
            for nn in range(N_filter_shape[0]):
                for im0 in range(N_image_shape[1]):
                    filter2d = filter_data_corr[nn, im0, :, :]
                    image2d = image_data[bb, im0, :, :]
                    for row in range(ref_output.shape[2]):
                        irow = row * subsample[0]  # image row
                        for col in range(ref_output.shape[3]):
                            icol = col * subsample[1]  # image col
                            ref_output[bb, nn, row, col] += (
                                image2d[
                                    irow : irow
                                    + dil_fil_shape2d[0] : filter_dilation[0],
                                    icol : icol
                                    + dil_fil_shape2d[1] : filter_dilation[1],
                                ]
                                * filter2d[::-1, ::-1]
                            ).sum()

        utt.assert_allclose(ref_output, theano_output)

        # TEST GRADIENT
        if verify_grad:
            utt.verify_grad(sym_CorrMM, [orig_image_data, filter_data], mode=self.mode)
示例#17
0
def test_batch_normalization_test():
    for axes in ("per-activation", "spatial", (1, 2, 3, 4)):
        for vartype in (tensor5, tensor3, vector):
            x, scale, bias, mean, var = (
                vartype(n) for n in ("x", "scale", "bias", "mean", "var")
            )
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            out = batchnorm.batch_normalization_test(
                x, scale, bias, mean, var, axes, eps
            )
            # reference forward pass
            if axes == "per-activation":
                axes2 = (0,)
            elif axes == "spatial":
                axes2 = (0,) + tuple(range(2, ndim))
            else:
                axes2 = axes
            scale2, bias2, mean2, var2 = (
                aet.addbroadcast(t, *axes2) for t in (scale, bias, mean, var)
            )
            out2 = (x - mean2) * (scale2 / aet.sqrt(var2 + eps)) + bias2
            # backward pass
            dy = vartype("dy")
            grads = aet.grad(
                None, wrt=[x, scale, bias, mean, var], known_grads={out: dy}
            )
            # reference backward pass
            grads2 = aet.grad(
                None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}
            )
            # compile
            f = aesara.function(
                [x, scale, bias, mean, var, dy], [out, out2] + grads + grads2
            )
            # check if the abstract Ops have been replaced
            assert not any(
                [
                    isinstance(
                        n.op,
                        (
                            batchnorm.AbstractBatchNormTrain,
                            batchnorm.AbstractBatchNormInference,
                            batchnorm.AbstractBatchNormTrainGrad,
                        ),
                    )
                    for n in f.maker.fgraph.toposort()
                ]
            )
            # run
            for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(
                    1 if d in axes2 else s for d, s in enumerate(data_shape)
                )
                X = 4 + 3 * np.random.randn(*data_shape).astype(aesara.config.floatX)
                Dy = -1 + 2 * np.random.randn(*data_shape).astype(aesara.config.floatX)
                Scale = np.random.randn(*param_shape).astype(aesara.config.floatX)
                Bias = np.random.randn(*param_shape).astype(aesara.config.floatX)
                Mean = np.random.randn(*param_shape).astype(aesara.config.floatX)
                Var = np.random.rand(*param_shape).astype(aesara.config.floatX)
                outputs = f(X, Scale, Bias, Mean, Var, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[1])  # out
                # compare gradients
                utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5)  # dx
                utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5)  # dscale
                utt.assert_allclose(outputs[4], outputs[4 + 5])  # dbias
                utt.assert_allclose(outputs[5], outputs[5 + 5])  # dmean
                utt.assert_allclose(
                    outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5
                )  # dvar
示例#18
0
    def run_conv_gradweight(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        if beta == 0:
            filters_val = None
        else:
            filters_val = np.random.random(filters_shape).astype(dtype)
            filters_val /= 10
        topgrad_val = self.array_like_conv_output(inputs_shape, filters_shape,
                                                  border_mode, subsample,
                                                  dilation, dtype)

        # Scale down the input values to prevent absolute errors in utt.assert_allclose.
        inputs_val /= 10
        topgrad_val /= 10

        inputs = aesara.shared(inputs_val)
        topgrad = aesara.shared(topgrad_val)

        # Compile an Aesara function for the cuDNN implementation
        grad_w = dnn_gradweight(
            inputs,
            topgrad,
            filters_shape,
            alpha=alpha,
            beta=beta,
            out=filters_val,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )

        f = aesara.function([], grad_w, mode=mode_with_gpu)

        # Compile an Aesara function for the reference implementation
        grad_w_ref = self.cpu_gradweight_class(
            border_mode=border_mode,
            subsample=subsample,
            filter_dilation=dilation)(ref_cast(inputs), ref_cast(topgrad),
                                      filters_shape[2:])
        if conv_mode == "conv":
            if inputs.ndim == 5:
                grad_w_ref = grad_w_ref[:, :, ::-1, ::-1, ::-1]
            else:
                grad_w_ref = grad_w_ref[:, :, ::-1, ::-1]
        f_ref = aesara.function([], grad_w_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_bwd_filter_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * filters_val
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
示例#19
0
    def run_conv_gradinput(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        if beta == 0:
            inputs_val = None
        else:
            inputs_val = np.random.random(inputs_shape).astype(dtype)
            inputs_val /= 10
        filters_val = np.random.random(filters_shape).astype(dtype)
        topgrad_val = self.array_like_conv_output(inputs_shape, filters_shape,
                                                  border_mode, subsample,
                                                  dilation, dtype)

        # Scale down the input values to prevent absolute errors in utt.assert_allclose.
        filters_val /= 10
        topgrad_val /= 10

        filters = aesara.shared(filters_val)
        topgrad = aesara.shared(topgrad_val)

        # Compile a aesara function for the cuDNN implementation
        grad_i = dnn_gradinput(
            filters,
            topgrad,
            inputs_shape,
            alpha=alpha,
            beta=beta,
            out=inputs_val,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )

        f = aesara.function([], grad_i, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == "conv":
            if filters.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile a aesara function for the reference implementation
        grad_i_ref = self.cpu_gradinput_class(
            border_mode=border_mode,
            subsample=subsample,
            filter_dilation=dilation)(ref_cast(flipped_filters),
                                      ref_cast(topgrad), inputs_shape[2:])
        f_ref = aesara.function([], grad_i_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_bwd_data_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * inputs_val
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
示例#20
0
    def test_machine_translation(self):
        # This test case comes from https://github.com/rizar/scan-grad-speed and
        # is an example of actual computation done with scan in the context of
        # machine translation
        #
        # 'dim' has been reduced from 1000 to 5 to make the test run faster

        # Parameters from an actual machine tranlation run
        batch_size = 80
        seq_len = 50
        dim = 5

        # Weight matrices
        U = aesara.shared(
            np.random.normal(size=(dim, dim),
                             scale=0.0001).astype(config.floatX))
        U.name = "U"
        V = aesara.shared(U.get_value())
        V.name = "V"
        W = aesara.shared(U.get_value())
        W.name = "W"

        # Variables and their values
        x = tensor3("x")
        x_value = np.random.normal(size=(seq_len, batch_size, dim),
                                   scale=0.0001).astype(config.floatX)

        ri = tensor3("ri")
        ri_value = x_value

        zi = tensor3("zi")
        zi_value = x_value

        init = aet.alloc(np.cast[config.floatX](0), batch_size, dim)

        def rnn_step1(
            # sequences
            x,
            ri,
            zi,
            # outputs_info
            h,
        ):
            pre_r = ri + h.dot(U)
            pre_z = zi + h.dot(V)
            r = sigmoid(pre_r)
            z = sigmoid(pre_z)

            after_r = r * h
            pre_h = x + after_r.dot(W)
            new_h = tanh(pre_h)

            res_h = z * new_h + (1 - z) * h
            return res_h

        # Compile the function twice, once with the optimization and once
        # without
        opt_mode = mode.including("scan")
        h, _ = aesara.scan(
            rnn_step1,
            sequences=[x, ri, zi],
            n_steps=seq_len,
            outputs_info=init,
            name="fpass1",
            mode=opt_mode,
        )
        cost = h[-1].sum()
        grad1 = grad(cost, [U, V, W])
        f_opt = aesara.function(inputs=[x, ri, zi],
                                outputs=grad1,
                                mode=opt_mode)

        no_opt_mode = mode.excluding("scanOp_pushout_output")
        h, _ = aesara.scan(
            rnn_step1,
            sequences=[x, ri, zi],
            n_steps=seq_len,
            outputs_info=init,
            name="fpass1",
            mode=no_opt_mode,
        )
        cost = h[-1].sum()
        grad1 = grad(cost, [U, V, W])
        f_no_opt = aesara.function(inputs=[x, ri, zi],
                                   outputs=grad1,
                                   mode=no_opt_mode)

        # Validate that the optimization has been applied
        scan_node_grad = [
            node for node in f_opt.maker.fgraph.toposort()
            if isinstance(node.op, Scan)
        ][1]

        for output in scan_node_grad.op.outputs:
            assert not (isinstance(output.owner.op, Elemwise) and any(
                [isinstance(i, Dot) for i in output.owner.inputs]))

        # Compare the outputs of the two functions on the same input data.
        f_opt_output = f_opt(x_value, ri_value, zi_value)
        f_no_opt_output = f_no_opt(x_value, ri_value, zi_value)
        utt.assert_allclose(f_opt_output, f_no_opt_output)
示例#21
0
    def test_odd(self):
        M = N - 1

        inputs_val = np.random.random((1, M, M)).astype("float32")
        inputs = theano.shared(inputs_val)

        rfft = theano.gpuarray.fft.curfft(inputs)
        f_rfft = theano.function([], rfft, mode=mode_with_gpu)
        res_rfft = f_rfft()

        res_rfft_comp = np.asarray(
            res_rfft[:, :, :, 0]) + 1j * np.asarray(res_rfft[:, :, :, 1])

        rfft_ref = np.fft.rfftn(inputs_val, s=(M, M), axes=(1, 2))

        utt.assert_allclose(rfft_ref, res_rfft_comp, atol=1e-4, rtol=1e-4)

        m = rfft.type()
        ifft = theano.gpuarray.fft.cuirfft(m, is_odd=True)
        f_ifft = theano.function([m], ifft, mode=mode_with_gpu)
        res_ifft = f_ifft(res_rfft)

        utt.assert_allclose(inputs_val, np.asarray(res_ifft))

        inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype("float32")
        inputs = theano.shared(inputs_val)

        irfft = theano.gpuarray.fft.cuirfft(inputs, norm="ortho", is_odd=True)
        f_irfft = theano.function([], irfft, mode=mode_with_gpu)
        res_irfft = f_irfft()

        inputs_ref = inputs_val[:, :, :, 0] + 1j * inputs_val[:, :, :, 1]
        irfft_ref = np.fft.irfftn(inputs_ref, s=(M, M), axes=(1, 2)) * M

        utt.assert_allclose(irfft_ref, res_irfft, atol=1e-4, rtol=1e-4)

        # The numerical gradient of the FFT is sensitive, must set large
        # enough epsilon to get good accuracy.
        eps = 1e-1

        def f_rfft(inp):
            return theano.gpuarray.fft.curfft(inp)

        inputs_val = np.random.random((1, M, M)).astype("float32")
        utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)

        def f_irfft(inp):
            return theano.gpuarray.fft.cuirfft(inp, is_odd=True)

        inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype("float32")
        utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)

        def f_rfft(inp):
            return theano.gpuarray.fft.curfft(inp, norm="ortho")

        inputs_val = np.random.random((1, M, M)).astype("float32")
        utt.verify_grad(f_rfft, [inputs_val], eps=eps, mode=mode_with_gpu)

        def f_irfft(inp):
            return theano.gpuarray.fft.cuirfft(inp,
                                               norm="no_norm",
                                               is_odd=True)

        inputs_val = np.random.random((1, M, M // 2 + 1, 2)).astype("float32")
        utt.verify_grad(f_irfft, [inputs_val], eps=eps, mode=mode_with_gpu)
示例#22
0
    def test_one_sequence_one_output_weights_gpu1(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = fvector("u")
        x0 = fscalar("x0")
        W_in = fscalar("win")
        W = fscalar("w")

        mode = mode_with_gpu.excluding("InputToGpuOptimizer")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode,
        )

        output = GpuFromHost(test_ctx_name)(output)
        f2 = aesara.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=mode,
        )

        rng = np.random.default_rng(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        v_u = np.asarray(v_u, dtype="float32")
        v_x0 = np.asarray(v_x0, dtype="float32")
        W = np.asarray(W, dtype="float32")
        W_in = np.asarray(W_in, dtype="float32")

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W

        aesara_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(aesara_values, v_out)

        # TO DEL
        topo = f2.maker.fgraph.toposort()
        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 0
        assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4

        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any(isinstance(node.op, GpuElemwise) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, HostFromGpu) for node in scan_node_topo)
        assert not any(
            isinstance(node.op, GpuFromHost) for node in scan_node_topo)
示例#23
0
 def assert_column_orthonormal(self, Ot):
     utt.assert_allclose(np.dot(Ot.T, Ot), np.eye(Ot.shape[1]))
示例#24
0
    def test_DownsampleFactorMax(self):
        rng = np.random.RandomState(utt.fetch_seed())
        # maxpool, input size
        examples = (
            ((2, ), (16, )),
            (
                (2, ),
                (
                    4,
                    16,
                ),
            ),
            (
                (2, ),
                (
                    4,
                    2,
                    16,
                ),
            ),
            ((1, 1), (4, 2, 16, 16)),
            ((2, 2), (4, 2, 16, 16)),
            ((3, 3), (4, 2, 16, 16)),
            ((3, 2), (4, 2, 16, 16)),
            ((3, 2, 2), (3, 2, 16, 16, 16)),
            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
        )

        for example, ignore_border, mode in product(
                examples,
            [True, False],
            ["max", "sum", "average_inc_pad", "average_exc_pad"],
        ):
            (maxpoolshp, inputsize) = example
            imval = rng.rand(*inputsize)
            images = aesara.shared(imval)

            # Pure Numpy computation
            numpy_output_val = self.numpy_max_pool_nd(imval,
                                                      maxpoolshp,
                                                      ignore_border,
                                                      mode=mode)

            # The pool_2d or pool_3d helper methods
            if len(maxpoolshp) == 2:
                output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
                f = function(
                    [],
                    [
                        output,
                    ],
                )
                output_val = f()
                utt.assert_allclose(output_val, numpy_output_val)
            elif len(maxpoolshp) == 3:
                output = pool_3d(images, maxpoolshp, ignore_border, mode=mode)
                f = function(
                    [],
                    [
                        output,
                    ],
                )
                output_val = f()
                utt.assert_allclose(output_val, numpy_output_val)

            # Pool op
            maxpool_op = Pool(ndim=len(maxpoolshp),
                              ignore_border=ignore_border,
                              mode=mode)(images, maxpoolshp)

            output_shape = Pool.out_shape(
                imval.shape,
                maxpoolshp,
                ndim=len(maxpoolshp),
                ignore_border=ignore_border,
            )
            utt.assert_allclose(np.asarray(output_shape),
                                numpy_output_val.shape)
            f = function([], maxpool_op)
            output_val = f()
            utt.assert_allclose(output_val, numpy_output_val)
示例#25
0
 def test1(self):
     a = tensor.dmatrix()
     w = sort(a)
     f = aesara.function([a], w)
     utt.assert_allclose(f(self.m_val), np.sort(self.m_val))