Exemplo n.º 1
0
    def test_forward(self):
        a = np.random.rand(1, 3, 224, 224).astype(np.float32)
        b = np.random.rand(64, 3, 7, 7).astype(np.float32)
        c = jt.mkl_ops.mkl_conv(a, b, 2, 2, 3, 3).data

        a_jt = jt.array(a)
        b_jt = jt.array(b)
        with jt.flag_scope(enable_tuner=0,
                           compile_options={"test_mkl_conv": 1}):
            c_jt = conv(a_jt, b_jt, 3, 2).data
        with jt.log_capture_scope(
                enable_tuner=1,
                compile_options={"test_mkl_conv": 2},
                log_v=0,
                log_vprefix="tuner_manager=100,conv_tuner=1000",
        ) as raw_logs:
            c_jt_tune = conv(a_jt, b_jt, 3, 2).data

        assert np.max(c_jt - c) < 1e-4 and np.max(c_jt_tune - c) < 1e-4
        logs = find_log_with_re(
            raw_logs,
            "Run tuner conv: confidence\\((.*)\\) candidates\\((.*)\\)$")
        assert len(logs) == 1
        assert logs[0][0] == '20'
        assert simple_parser(logs[0][1]) == {'relay0': [1, 0]}
Exemplo n.º 2
0
def check_backward(xshape, wshape, stride, padding, dilation, use_cuda, nhwc):
    if nhwc:
        test_func = test_nhwc
    else:
        test_func = test_nchw
    if use_cuda == 1:
        op_name = "cudnn_conv"
    else:
        op_name = "mkl_conv"

    with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1,
        log_v=1, log_vprefix="op.cc=1000,exe=1000,conv_t=1000", compile_options={"test":244}
    ) as raw_log:
        x = jt.random(xshape)
        w = jt.random(wshape)
        y = test_func(x, w, stride, padding, dilation)
        loss = y.mean()
        dx, dw = jt.grad(loss, [x, w])
        jt.sync([y, loss, dx, dw])
    with jt.flag_scope(use_cuda=0, enable_tuner=0, compile_options={"test":233}):
        cy = test_func(x, w, stride, padding, dilation)
        closs = cy.mean()
        cdx, cdw = jt.grad(closs, [x, w])
        jt.sync([cy, closs, cdx, cdw])
    logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + op_name + ".*)")
    assert len(logs)==3 and "oihw" in logs[0][0], (logs)
    assert np.allclose(y.data, cy.data, 1e-3)
    assert np.allclose(dw.data, cdw.data, 1e-3), (dw.data, cdw.data)
    assert np.allclose(dx.data, cdx.data, 1e-3), (dx.data, cdx.data, np.abs(cdx.data).max(), np.abs(dx.data - cdx.data).max())
Exemplo n.º 3
0
        def check(xshape, wshape, stride=1, padding=0, dilation=1):
            with jt.log_capture_scope(
                    use_cuda=1,
                    enable_tuner=1,
                    log_v=1,
                    log_vprefix="op.cc=100,exe=1000") as raw_log:
                x = jt.random(xshape)
                w = jt.random(wshape)
                y = conv(x, w, stride, padding)
                mask = jt.random(y.shape)
                loss = mask * y
                dx, dw = jt.grad(loss, [x, w])
                jt.sync([y, loss, dx, dw])

            # fails when enable_tuner=1, something wrong with mkl_conv_backward_x maybe.
            with jt.flag_scope(use_cuda=0, enable_tuner=0):
                cy = conv(x, w, stride, padding)
                closs = mask * cy
                cdx, cdw = jt.grad(closs, [x, w])
                jt.sync([cy, closs, cdx, cdw])
            logs = find_log_with_re(raw_log,
                                    "(Jit op key (not )?found: cudnn_conv.*)")
            assert len(logs) == 3 and "oihw" in logs[0][0], logs
            assert np.allclose(y.data, cy.data)
            assert np.allclose(dx.data, cdx.data, 1e-2)
            assert np.allclose(dw.data, cdw.data, 1e-2)
Exemplo n.º 4
0
    def test_resnet_infer_with_feature(self):
        cat_url = "https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=3782485413,1118109468&fm=26&gp=0.jpg"
        import jittor_utils
        cat_path = f"{jt.flags.cache_path}/cat.jpg"
        print("download")
        jittor_utils.download(cat_url, cat_path)
        with open(cat_path, 'rb') as f:
            img = Image.open(f).convert('RGB')
            img = jt.array(np.array(img))
            print(img.shape, img.dtype)
            img = ((img.float() - 128) / 255).transpose(2, 0, 1)

        with jt.flag_scope(trace_py_var=2, trace_var_data=1):
            img = img[None, ...]

            resnet18 = resnet.Resnet18(pretrained=True)
            x = jt.float32(img)
            y = resnet18(x)
            y.sync()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
            with open(f"{jt.flags.cache_path}/resnet_with_feature.pkl",
                      "wb") as f:
                pickle.dump(data, f)
            for k, v in data["execute_op_info"].items():
                for i in v['fused_ops']:
                    if i not in data["node_data"]:
                        assert 0, (i, "not found")
Exemplo n.º 5
0
    def test_simple_model_train(self):
        with jt.flag_scope(trace_py_var=2):

            model = Model(input_size=1)
            opt = jt.optim.SGD(model.parameters(), 0.1)

            batch_size = 10
            x = jt.float32(np.random.rand(batch_size, 1))
            y = model(x)
            opt.step(y**2)
            jt.sync_all()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
            # print_stack_tree(data)
            for k, v in data["execute_op_info"].items():
                for i in v['fused_ops']:
                    if i not in data["node_data"]:
                        assert 0, (i, "not found")

            for k, v in list(data["node_data"].items()):
                if v["attrs"]["name"] == "unname":
                    assert 0
            print(len(data["node_data"]))
            with open(f"{jt.flags.cache_path}/simple_model_train.pkl",
                      "wb") as f:
                pickle.dump(data, f)
Exemplo n.º 6
0
    def test_backward_once_cuda(self):
        with jt.flag_scope(use_cuda=1):
            np.random.seed(0)
            jt.set_seed(3)
            model = Model2()
            n = 1
            batch_size = 50

            def get_data(n):
                for i in range(n):
                    x = np.random.rand(batch_size, 1)
                    y = x * x
                    yield jt.float32(x), jt.float32(y)

            for i, (x, y) in enumerate(get_data(n)):
                pred_y = model(x).name("pred_y")
                with jt.log_capture_scope(log_v=0,
                                          log_vprefix="op.cc=100") as logs:
                    jt.sync_all()
                logs = find_log_with_re(
                    logs, "Jit op key (not )?found: (cublas)_matmul.*")
                assert (len(logs) == 1)
                with jt.log_capture_scope(
                        log_silent=1, log_v=0,
                        log_vprefix="op.cc=100,exe=1000") as logs_b:
                    gs = jt.grad(pred_y, x)
                    gs2 = jt.grad(pred_y, model.linear1.weight)
                    jt.sync_all()
                logs_b = find_log_with_re(
                    logs_b, "Jit op key (not )?found: (cublas)_matmul.*")
                assert len(logs_b) == 2, len(logs_b)
            jt.clean()
Exemplo n.º 7
0
def check_backward(xshape, wshape, stride, padding, dilation, groups, use_cuda,
                   nhwc):
    assert nhwc == 0
    test_func = test_nchw

    # only check cudnn
    with jt.log_capture_scope(use_cuda=use_cuda,
                              enable_tuner=1,
                              log_v=10,
                              log_vprefix="conv_tuner.cc=1000") as raw_log:
        x = jt.random(xshape)
        w = jt.random(wshape)
        y = test_func(x, w, stride, padding, dilation, groups)
        dx, dw = jt.grad(y, [x, w])
        jt.sync([y, dx, dw])
    with jt.flag_scope(use_cuda=0,
                       enable_tuner=0,
                       compile_options={"test": 233}):
        cy = test_func(x, w, stride, padding, dilation, groups)
        cdx, cdw = jt.grad(cy, [x, w])
        jt.sync([cy, cdx, cdw])

    assert np.allclose(y.data, cy.data)
    assert np.allclose(dw.data,
                       cdw.data, 1e-3), (dw.data, cdw.data,
                                         np.abs(dw.data - cdw.data).max())
    assert np.allclose(dx.data,
                       cdx.data, 1e-3), (dx.data, cdx.data,
                                         np.abs(dx.data - cdx.data).max())
Exemplo n.º 8
0
    def test_print_trace(self):
        jt.print_trace()

        # force use addr2line
        jt.flags.gdb_path = ""
        with jt.flag_scope(gdb_path=""):
            jt.print_trace()
Exemplo n.º 9
0
    def test(self):
        def forward_code(np, data):
            a = data["inputs"][0]
            b = data["outputs"][0]
            if (jt.flags.use_cuda == 0):
                assert isinstance(a, numpy.ndarray)
            else:
                assert isinstance(a, cupy.core.core.ndarray)
            np.add(a, a, out=b)

        def backward_code(np, data):
            dout = data["dout"]
            out = data["outputs"][0]
            np.copyto(out, dout * 2.0)

        def check():
            a = jt.random((5, 1))
            b = jt.numpy_code(
                a.shape,
                a.dtype,
                [a],
                forward_code,
                [backward_code],
            )
            assert numpy.allclose(b.data, (a + a).data)
            da = jt.grad(b, a)
            one = numpy.ones(a.shape)
            assert numpy.allclose(da.data, one * 2.0)

        if jt.has_cuda:
            with jt.flag_scope(use_cuda=1):
                check()
        check()
Exemplo n.º 10
0
def check_forward(xshape, wshape, stride, padding, dilation, use_cuda, nhwc):
    if nhwc:
        test_func = test_nhwc
    else:
        test_func = test_nchw
    if use_cuda == 1:
        op_name = "cudnn_conv"
    else:
        op_name = "mkl_conv"
    with jt.log_capture_scope(use_cuda=use_cuda,
                              enable_tuner=1,
                              log_v=0,
                              log_vprefix="op.cc=100,conv_tuner=1000",
                              compile_options={"test": 266}) as raw_log:
        x = jt.random(xshape)
        w = jt.random(wshape)
        y = test_func(x, w, stride, padding, dilation)
        y.sync()
    with jt.flag_scope(use_cuda=0,
                       enable_tuner=0,
                       compile_options={"test": 255}):
        cy = test_func(x, w, stride, padding, dilation)
        cy.sync()
    logs = find_log_with_re(raw_log,
                            "(Jit op key (not )?found: " + op_name + ".*)")
    assert len(logs) == 1 and "oihw" in logs[0][0], logs
    assert np.allclose(y.data, cy.data)
Exemplo n.º 11
0
    def test_print_trace(self):
        jt.print_trace()

        if os.name != 'nt':
            # force use addr2line
            with jt.flag_scope(gdb_path=""):
                jt.print_trace()
Exemplo n.º 12
0
        def check(xshape, wshape, stride, pad):
            a = np.random.rand(*xshape).astype(np.float32)
            b = np.random.rand(*wshape).astype(np.float32)
            c = jt.mkl_ops.mkl_conv(a,
                                    b,
                                    stride,
                                    stride,
                                    pad,
                                    pad,
                                    1,
                                    1,
                                    xformat="acdb",
                                    wformat="hwio").data

            a_jt = jt.array(a)
            b_jt = jt.array(b)
            with jt.flag_scope(enable_tuner=0,
                               compile_options={"test_mkl_conv": uid[0]}):
                c_jt = conv_nhwc_hwio(a_jt, b_jt, stride, pad).data
            with jt.log_capture_scope(
                    enable_tuner=1,
                    compile_options={"test_mkl_conv": uid[0] + 1},
                    log_v=0,
                    log_vprefix="tuner_manager=100,conv_tuner=1000",
            ) as raw_logs:
                c_jt_tune = conv_nhwc_hwio(a_jt, b_jt, stride, pad).data
            uid[0] += 2

            assert np.max(c_jt - c) < 1e-4 and np.max(c_jt_tune - c) < 1e-4
            logs = find_log_with_re(
                raw_logs,
                "Run tuner conv: confidence\\((.*)\\) candidates\\((.*)\\)$")
            assert len(logs) == 1, raw_logs
            assert logs[0][0] == '20'
            assert simple_parser(logs[0][1]) == {'relay0': [1, 0]}
Exemplo n.º 13
0
 def test_array_migrate(self):
     with jt.flag_scope(use_cuda=1):
         a = jt.array(np.float32([1,2,3]))
         b = jt.code(a.shape, a.dtype, [a], cpu_src="""
             for (int i=0; i<in0shape0; i++)
                 @out(i) = @in0(i)*@in0(i)*2;
         """)
         assert (b.data==[2,8,18]).all()
Exemplo n.º 14
0
    def test_resnet(self):
        with jt.flag_scope(trace_py_var=2):

            resnet18 = resnet.Resnet18()
            x = jt.float32(np.random.rand(2, 3, 224, 224))
            y = resnet18(x)
            y.sync()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
    def test5(self):
        with jt.flag_scope(use_cuda=1):
            f32 = jt.float32
            np.random.seed(0)
            jt.set_seed(3)

            x = f32(np.random.rand(1, 1))
            w = (jt.random([x.shape[-1], 10]) - f32(0.5)) / f32(
                x.shape[-1])**f32(0.5)
            jt.nn.matmul(x, w).data
 def test_main_cuda(self):
     with jt.flag_scope(use_cuda=1):
         test_n = 10
         test([50, 50, 50, 50], multiplication, subtraction)
         for i in range(test_n):
             n = random.randint(1, 4)
             shape = []
             for j in range(n):
                 shape.append(random.randint(1, 50))
             test(shape, get_random_op(), get_random_op())
Exemplo n.º 17
0
    def test_simple_model(self):
        with jt.flag_scope(trace_py_var=2):

            model = Model(input_size=1)
            batch_size = 10
            x = jt.float32(np.random.rand(batch_size, 1))
            y = model(x)
            y.sync()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
Exemplo n.º 18
0
        def check(xshape, wshape, stride=(1,1,1), padding=(0,0,0), dilation=(1,1,1), group=1):
            with jt.flag_scope(use_cuda=1):
                x = jt.random(xshape)
                w = jt.random(wshape)
                jt.sync_all()

            y2 = jt.nn.conv_transpose3d(x, w, None, stride, padding, 0, group, dilation)
            jt.sync_all()

            with jt.flag_scope(use_cuda=1):
                # y = jt.cudnn.ops.cudnn_conv3d_backward_x(w, x, *y2.shape[2:], *stride, *padding, *dilation, group)
                y = jt.nn.conv_transpose3d(x, w, None, stride, padding, 0, group, dilation)
                masky = jt.rand_like(y)
                dx, dw = jt.grad(masky*y, [x, w])
                jt.sync_all()
                
            dx2, dw2 = jt.grad(masky*y2, [x, w])
            jt.sync_all()
            np.testing.assert_allclose(y.numpy(), y2.numpy(), rtol=1e-6, atol=1e-4)
            np.testing.assert_allclose(dx.numpy(), dx2.numpy(), rtol=1e-6, atol=1e-4)
            np.testing.assert_allclose(dw.numpy(), dw2.numpy(), rtol=1e-5, atol=1e-3)
Exemplo n.º 19
0
    def test5(self):
        with jt.flag_scope(use_cuda=1):
            f32 = jt.float32
            np.random.seed(0)
            jt.set_seed(3)

            x = f32(np.random.rand(1, 1))
            w = jt.make_var(
                [x.shape[-1], 10],
                init=lambda *a:
                (jt.random(*a) - f32(0.5)) / f32(x.shape[-1])**f32(0.5))
            jt.nn.matmul(x, w).data
Exemplo n.º 20
0
 def test_matmul_cuda(self):
     with jt.flag_scope(use_cuda=1):
         test_matmul([2, 5], [5, 8])
         test_matmul([200, 500], [500, 800])
         test_matmul([500, 500], [500, 50])
         test_matmul2([2, 5], [5, 8], False, False)
         test_matmul2([5, 2], [5, 8], True, False)
         test_matmul2([500, 200], [500, 800], True, False)
         test_matmul2([500, 500], [500, 50], True, False)
         test_matmul2([2, 5], [8, 5], False, True)
         test_matmul2([200, 500], [800, 500], False, True)
         test_matmul2([500, 500], [50, 500], False, True)
Exemplo n.º 21
0
    def test_simple_model(self):
        with jt.flag_scope(trace_py_var=2):

            model = Model(input_size=1)
            batch_size = 10
            x = jt.float32(np.random.rand(batch_size, 1))
            y = model(x)
            y.sync()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
            with open(f"{jt.flags.cache_path}/simple_model.pkl", "wb") as f:
                pickle.dump(data, f)
Exemplo n.º 22
0
    def test_resnet_train(self):
        with jt.flag_scope(trace_py_var=2):

            resnet18 = resnet.Resnet18()
            opt = jt.optim.SGD(resnet18.parameters(), 0.1)
            x = jt.float32(np.random.rand(2, 3, 224, 224))
            y = resnet18(x)

            opt.step(y**2)
            jt.sync_all()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
Exemplo n.º 23
0
    def test_64_bit(self):
        a = np.random.rand(10)
        b = jt.array(a)
        assert b.dtype == "float32"

        with jt.flag_scope(auto_convert_64_to_32=0):
            a = np.random.rand(10)
            b = jt.array(a)
            assert b.dtype == "float64"

        a = np.random.rand(10)
        b = jt.array64(a)
        assert b.dtype == "float64"
Exemplo n.º 24
0
    def test_matmul_type_cuda(self):
        with jt.flag_scope(use_cuda=1):
            test_matmul2([2, 5], [5, 8], False, False, 'float32')
            test_matmul2([5, 2], [5, 8], True, False, 'float32')
            test_matmul2([2, 5], [8, 5], False, True, 'float32')

            test_matmul2([2, 5], [5, 8], False, False, 'float64')
            test_matmul2([5, 2], [5, 8], True, False, 'float64')
            test_matmul2([2, 5], [8, 5], False, True, 'float64')

            test_matmul2([2, 5], [5, 8], False, False, 'int32')
            test_matmul2([5, 2], [5, 8], True, False, 'int32')
            test_matmul2([2, 5], [8, 5], False, True, 'int32')
Exemplo n.º 25
0
 def test_stat(self):
     jt.clean()
     with jt.flag_scope(use_stat_allocator=1, use_sfrl_allocator=0):
         a = jt.random([10, 10])
         b = a + a
         c = a * b
         c.data
         del a, b, c
         gc.collect()
     assert jt.flags.stat_allocator_total_alloc_call == 2
     assert jt.flags.stat_allocator_total_alloc_byte == 800
     assert jt.flags.stat_allocator_total_free_call == 2
     assert jt.flags.stat_allocator_total_free_byte == 800
Exemplo n.º 26
0
    def test_simple_model_train(self):
        with jt.flag_scope(trace_py_var=2):

            model = Model(input_size=1)
            opt = jt.optim.SGD(model.parameters(), 0.1)

            batch_size = 10
            x = jt.float32(np.random.rand(batch_size, 1))
            y = model(x)
            opt.step(y**2)
            jt.sync_all()

            data = jt.dump_trace_data()
            jt.clear_trace_data()
Exemplo n.º 27
0
 def check(xshape, wshape, stride=1, padding=0, dilation=1):
     with jt.log_capture_scope(use_cuda=1, enable_tuner=1,
         log_v=0, log_vprefix="op.cc=100"
     ) as raw_log:
         x = jt.random(xshape)
         w = jt.random(wshape)
         y = conv_oihw(x, w, stride, padding, dilation)
         y.sync()
     with jt.flag_scope(use_cuda=0, enable_tuner=1):
         cy = conv_oihw(x, w, stride, padding, dilation)
         cy.sync()
     logs = find_log_with_re(raw_log, "(Jit op key (not )?found: cudnn_conv.*)")
     assert len(logs)==1 and "oihw" in logs[0][0], logs
     assert np.allclose(y.data, cy.data), np.abs(y.data-cy.data).max()
 def test(self):
     a = jt.array([1,2,3])
     a.sync()
     assert a.compile_options=={}
     a.compile_options = {"compile_shapes":1}
     assert a.compile_options=={"compile_shapes":1}
     b = a+a
     assert b.compile_options=={}
     with jt.flag_scope(compile_options={"compile_shapes":1}):
         c = a+b
     assert c.compile_options=={"compile_shapes":1}
     with jt.profile_scope() as report:
         c.sync()
     assert len(report)==2 and "compile_shapes:1" in report[1][0]
Exemplo n.º 29
0
        def check_gpu_with_cpu(T, C, N, S, S_min):
            jt.set_global_seed(1)

            # Initialize random batch of input vectors, for *size = (T,N,C)
            input = jt.randn(T, N, C).log_softmax(2)
            # input = -jt.ones((T, N, C))
            # input[0,0,1] += 0.01

            # Initialize random batch of targets (0 = blank, 1:C = classes)
            target = jt.randint(low=1, high=C, shape=(N, S), dtype=jt.int)
            _input_jt = input

            input_lengths = jt.full((N, ), T, dtype=jt.int)
            target_lengths = jt.randint(low=S_min,
                                        high=S + 1,
                                        shape=(N, ),
                                        dtype=jt.int)
            # ctc_loss = nn.CTCLoss()
            loss = jt.ctc_loss(input,
                               target,
                               input_lengths,
                               target_lengths,
                               reduction='none')
            _loss_jt = loss

            loss_jt = loss.numpy()

            dinput_jt = jt.grad(_loss_jt, _input_jt)
            dinput_jt.sync()

            with jt.flag_scope(use_cuda=1):
                input = input.copy()
                target = target.copy()
                input_lengths = input_lengths.copy()
                target_lengths = target_lengths.copy()
                loss = jt.ctc_loss(input,
                                   target,
                                   input_lengths,
                                   target_lengths,
                                   reduction='none')
                grad = jt.grad(loss, input)
                np.testing.assert_allclose(_loss_jt.numpy(),
                                           loss.numpy(),
                                           atol=1e-5,
                                           rtol=1e-5)
                np.testing.assert_allclose(dinput_jt.numpy(),
                                           grad.numpy(),
                                           atol=1e-5,
                                           rtol=1e-5)
Exemplo n.º 30
0
 def check(xshape, wshape, stride=(1,1,1), padding=(0,0,0), dilation=(1,1,1), group=1):
     with jt.flag_scope(use_cuda=1):
         x = jt.random(xshape)
         w = jt.random(wshape)
         # y = jt.cudnn.ops.cudnn_conv3d(x, w, *stride, *padding, *dilation, group)
         y = jt.nn.conv3d(x, w, None, stride, padding, dilation, group)
         masky = jt.rand_like(y)
         dx, dw = jt.grad(masky*y, [x, w])
         jt.sync_all()
         
     y2 = jt.nn.conv3d(x, w, None, stride, padding, dilation, group)
     dx2, dw2 = jt.grad(masky*y2, [x, w])
     np.testing.assert_allclose(y.data, y2.data)
     np.testing.assert_allclose(dx.data, dx2.data, rtol=1e-5, atol=1e-3)
     np.testing.assert_allclose(dw.data, dw2.data, rtol=1e-5, atol=1e-3)