def test_forward(self): a = np.random.rand(1, 3, 224, 224).astype(np.float32) b = np.random.rand(64, 3, 7, 7).astype(np.float32) c = jt.mkl_ops.mkl_conv(a, b, 2, 2, 3, 3).data a_jt = jt.array(a) b_jt = jt.array(b) with jt.flag_scope(enable_tuner=0, compile_options={"test_mkl_conv": 1}): c_jt = conv(a_jt, b_jt, 3, 2).data with jt.log_capture_scope( enable_tuner=1, compile_options={"test_mkl_conv": 2}, log_v=0, log_vprefix="tuner_manager=100,conv_tuner=1000", ) as raw_logs: c_jt_tune = conv(a_jt, b_jt, 3, 2).data assert np.max(c_jt - c) < 1e-4 and np.max(c_jt_tune - c) < 1e-4 logs = find_log_with_re( raw_logs, "Run tuner conv: confidence\\((.*)\\) candidates\\((.*)\\)$") assert len(logs) == 1 assert logs[0][0] == '20' assert simple_parser(logs[0][1]) == {'relay0': [1, 0]}
def check_backward(xshape, wshape, stride, padding, dilation, use_cuda, nhwc): if nhwc: test_func = test_nhwc else: test_func = test_nchw if use_cuda == 1: op_name = "cudnn_conv" else: op_name = "mkl_conv" with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1, log_v=1, log_vprefix="op.cc=1000,exe=1000,conv_t=1000", compile_options={"test":244} ) as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = test_func(x, w, stride, padding, dilation) loss = y.mean() dx, dw = jt.grad(loss, [x, w]) jt.sync([y, loss, dx, dw]) with jt.flag_scope(use_cuda=0, enable_tuner=0, compile_options={"test":233}): cy = test_func(x, w, stride, padding, dilation) closs = cy.mean() cdx, cdw = jt.grad(closs, [x, w]) jt.sync([cy, closs, cdx, cdw]) logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + op_name + ".*)") assert len(logs)==3 and "oihw" in logs[0][0], (logs) assert np.allclose(y.data, cy.data, 1e-3) assert np.allclose(dw.data, cdw.data, 1e-3), (dw.data, cdw.data) assert np.allclose(dx.data, cdx.data, 1e-3), (dx.data, cdx.data, np.abs(cdx.data).max(), np.abs(dx.data - cdx.data).max())
def check(xshape, wshape, stride=1, padding=0, dilation=1): with jt.log_capture_scope( use_cuda=1, enable_tuner=1, log_v=1, log_vprefix="op.cc=100,exe=1000") as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = conv(x, w, stride, padding) mask = jt.random(y.shape) loss = mask * y dx, dw = jt.grad(loss, [x, w]) jt.sync([y, loss, dx, dw]) # fails when enable_tuner=1, something wrong with mkl_conv_backward_x maybe. with jt.flag_scope(use_cuda=0, enable_tuner=0): cy = conv(x, w, stride, padding) closs = mask * cy cdx, cdw = jt.grad(closs, [x, w]) jt.sync([cy, closs, cdx, cdw]) logs = find_log_with_re(raw_log, "(Jit op key (not )?found: cudnn_conv.*)") assert len(logs) == 3 and "oihw" in logs[0][0], logs assert np.allclose(y.data, cy.data) assert np.allclose(dx.data, cdx.data, 1e-2) assert np.allclose(dw.data, cdw.data, 1e-2)
def test_resnet_infer_with_feature(self): cat_url = "https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=3782485413,1118109468&fm=26&gp=0.jpg" import jittor_utils cat_path = f"{jt.flags.cache_path}/cat.jpg" print("download") jittor_utils.download(cat_url, cat_path) with open(cat_path, 'rb') as f: img = Image.open(f).convert('RGB') img = jt.array(np.array(img)) print(img.shape, img.dtype) img = ((img.float() - 128) / 255).transpose(2, 0, 1) with jt.flag_scope(trace_py_var=2, trace_var_data=1): img = img[None, ...] resnet18 = resnet.Resnet18(pretrained=True) x = jt.float32(img) y = resnet18(x) y.sync() data = jt.dump_trace_data() jt.clear_trace_data() with open(f"{jt.flags.cache_path}/resnet_with_feature.pkl", "wb") as f: pickle.dump(data, f) for k, v in data["execute_op_info"].items(): for i in v['fused_ops']: if i not in data["node_data"]: assert 0, (i, "not found")
def test_simple_model_train(self): with jt.flag_scope(trace_py_var=2): model = Model(input_size=1) opt = jt.optim.SGD(model.parameters(), 0.1) batch_size = 10 x = jt.float32(np.random.rand(batch_size, 1)) y = model(x) opt.step(y**2) jt.sync_all() data = jt.dump_trace_data() jt.clear_trace_data() # print_stack_tree(data) for k, v in data["execute_op_info"].items(): for i in v['fused_ops']: if i not in data["node_data"]: assert 0, (i, "not found") for k, v in list(data["node_data"].items()): if v["attrs"]["name"] == "unname": assert 0 print(len(data["node_data"])) with open(f"{jt.flags.cache_path}/simple_model_train.pkl", "wb") as f: pickle.dump(data, f)
def test_backward_once_cuda(self): with jt.flag_scope(use_cuda=1): np.random.seed(0) jt.set_seed(3) model = Model2() n = 1 batch_size = 50 def get_data(n): for i in range(n): x = np.random.rand(batch_size, 1) y = x * x yield jt.float32(x), jt.float32(y) for i, (x, y) in enumerate(get_data(n)): pred_y = model(x).name("pred_y") with jt.log_capture_scope(log_v=0, log_vprefix="op.cc=100") as logs: jt.sync_all() logs = find_log_with_re( logs, "Jit op key (not )?found: (cublas)_matmul.*") assert (len(logs) == 1) with jt.log_capture_scope( log_silent=1, log_v=0, log_vprefix="op.cc=100,exe=1000") as logs_b: gs = jt.grad(pred_y, x) gs2 = jt.grad(pred_y, model.linear1.weight) jt.sync_all() logs_b = find_log_with_re( logs_b, "Jit op key (not )?found: (cublas)_matmul.*") assert len(logs_b) == 2, len(logs_b) jt.clean()
def check_backward(xshape, wshape, stride, padding, dilation, groups, use_cuda, nhwc): assert nhwc == 0 test_func = test_nchw # only check cudnn with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1, log_v=10, log_vprefix="conv_tuner.cc=1000") as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = test_func(x, w, stride, padding, dilation, groups) dx, dw = jt.grad(y, [x, w]) jt.sync([y, dx, dw]) with jt.flag_scope(use_cuda=0, enable_tuner=0, compile_options={"test": 233}): cy = test_func(x, w, stride, padding, dilation, groups) cdx, cdw = jt.grad(cy, [x, w]) jt.sync([cy, cdx, cdw]) assert np.allclose(y.data, cy.data) assert np.allclose(dw.data, cdw.data, 1e-3), (dw.data, cdw.data, np.abs(dw.data - cdw.data).max()) assert np.allclose(dx.data, cdx.data, 1e-3), (dx.data, cdx.data, np.abs(dx.data - cdx.data).max())
def test_print_trace(self): jt.print_trace() # force use addr2line jt.flags.gdb_path = "" with jt.flag_scope(gdb_path=""): jt.print_trace()
def test(self): def forward_code(np, data): a = data["inputs"][0] b = data["outputs"][0] if (jt.flags.use_cuda == 0): assert isinstance(a, numpy.ndarray) else: assert isinstance(a, cupy.core.core.ndarray) np.add(a, a, out=b) def backward_code(np, data): dout = data["dout"] out = data["outputs"][0] np.copyto(out, dout * 2.0) def check(): a = jt.random((5, 1)) b = jt.numpy_code( a.shape, a.dtype, [a], forward_code, [backward_code], ) assert numpy.allclose(b.data, (a + a).data) da = jt.grad(b, a) one = numpy.ones(a.shape) assert numpy.allclose(da.data, one * 2.0) if jt.has_cuda: with jt.flag_scope(use_cuda=1): check() check()
def check_forward(xshape, wshape, stride, padding, dilation, use_cuda, nhwc): if nhwc: test_func = test_nhwc else: test_func = test_nchw if use_cuda == 1: op_name = "cudnn_conv" else: op_name = "mkl_conv" with jt.log_capture_scope(use_cuda=use_cuda, enable_tuner=1, log_v=0, log_vprefix="op.cc=100,conv_tuner=1000", compile_options={"test": 266}) as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = test_func(x, w, stride, padding, dilation) y.sync() with jt.flag_scope(use_cuda=0, enable_tuner=0, compile_options={"test": 255}): cy = test_func(x, w, stride, padding, dilation) cy.sync() logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + op_name + ".*)") assert len(logs) == 1 and "oihw" in logs[0][0], logs assert np.allclose(y.data, cy.data)
def test_print_trace(self): jt.print_trace() if os.name != 'nt': # force use addr2line with jt.flag_scope(gdb_path=""): jt.print_trace()
def check(xshape, wshape, stride, pad): a = np.random.rand(*xshape).astype(np.float32) b = np.random.rand(*wshape).astype(np.float32) c = jt.mkl_ops.mkl_conv(a, b, stride, stride, pad, pad, 1, 1, xformat="acdb", wformat="hwio").data a_jt = jt.array(a) b_jt = jt.array(b) with jt.flag_scope(enable_tuner=0, compile_options={"test_mkl_conv": uid[0]}): c_jt = conv_nhwc_hwio(a_jt, b_jt, stride, pad).data with jt.log_capture_scope( enable_tuner=1, compile_options={"test_mkl_conv": uid[0] + 1}, log_v=0, log_vprefix="tuner_manager=100,conv_tuner=1000", ) as raw_logs: c_jt_tune = conv_nhwc_hwio(a_jt, b_jt, stride, pad).data uid[0] += 2 assert np.max(c_jt - c) < 1e-4 and np.max(c_jt_tune - c) < 1e-4 logs = find_log_with_re( raw_logs, "Run tuner conv: confidence\\((.*)\\) candidates\\((.*)\\)$") assert len(logs) == 1, raw_logs assert logs[0][0] == '20' assert simple_parser(logs[0][1]) == {'relay0': [1, 0]}
def test_array_migrate(self): with jt.flag_scope(use_cuda=1): a = jt.array(np.float32([1,2,3])) b = jt.code(a.shape, a.dtype, [a], cpu_src=""" for (int i=0; i<in0shape0; i++) @out(i) = @in0(i)*@in0(i)*2; """) assert (b.data==[2,8,18]).all()
def test_resnet(self): with jt.flag_scope(trace_py_var=2): resnet18 = resnet.Resnet18() x = jt.float32(np.random.rand(2, 3, 224, 224)) y = resnet18(x) y.sync() data = jt.dump_trace_data() jt.clear_trace_data()
def test5(self): with jt.flag_scope(use_cuda=1): f32 = jt.float32 np.random.seed(0) jt.set_seed(3) x = f32(np.random.rand(1, 1)) w = (jt.random([x.shape[-1], 10]) - f32(0.5)) / f32( x.shape[-1])**f32(0.5) jt.nn.matmul(x, w).data
def test_main_cuda(self): with jt.flag_scope(use_cuda=1): test_n = 10 test([50, 50, 50, 50], multiplication, subtraction) for i in range(test_n): n = random.randint(1, 4) shape = [] for j in range(n): shape.append(random.randint(1, 50)) test(shape, get_random_op(), get_random_op())
def test_simple_model(self): with jt.flag_scope(trace_py_var=2): model = Model(input_size=1) batch_size = 10 x = jt.float32(np.random.rand(batch_size, 1)) y = model(x) y.sync() data = jt.dump_trace_data() jt.clear_trace_data()
def check(xshape, wshape, stride=(1,1,1), padding=(0,0,0), dilation=(1,1,1), group=1): with jt.flag_scope(use_cuda=1): x = jt.random(xshape) w = jt.random(wshape) jt.sync_all() y2 = jt.nn.conv_transpose3d(x, w, None, stride, padding, 0, group, dilation) jt.sync_all() with jt.flag_scope(use_cuda=1): # y = jt.cudnn.ops.cudnn_conv3d_backward_x(w, x, *y2.shape[2:], *stride, *padding, *dilation, group) y = jt.nn.conv_transpose3d(x, w, None, stride, padding, 0, group, dilation) masky = jt.rand_like(y) dx, dw = jt.grad(masky*y, [x, w]) jt.sync_all() dx2, dw2 = jt.grad(masky*y2, [x, w]) jt.sync_all() np.testing.assert_allclose(y.numpy(), y2.numpy(), rtol=1e-6, atol=1e-4) np.testing.assert_allclose(dx.numpy(), dx2.numpy(), rtol=1e-6, atol=1e-4) np.testing.assert_allclose(dw.numpy(), dw2.numpy(), rtol=1e-5, atol=1e-3)
def test5(self): with jt.flag_scope(use_cuda=1): f32 = jt.float32 np.random.seed(0) jt.set_seed(3) x = f32(np.random.rand(1, 1)) w = jt.make_var( [x.shape[-1], 10], init=lambda *a: (jt.random(*a) - f32(0.5)) / f32(x.shape[-1])**f32(0.5)) jt.nn.matmul(x, w).data
def test_matmul_cuda(self): with jt.flag_scope(use_cuda=1): test_matmul([2, 5], [5, 8]) test_matmul([200, 500], [500, 800]) test_matmul([500, 500], [500, 50]) test_matmul2([2, 5], [5, 8], False, False) test_matmul2([5, 2], [5, 8], True, False) test_matmul2([500, 200], [500, 800], True, False) test_matmul2([500, 500], [500, 50], True, False) test_matmul2([2, 5], [8, 5], False, True) test_matmul2([200, 500], [800, 500], False, True) test_matmul2([500, 500], [50, 500], False, True)
def test_simple_model(self): with jt.flag_scope(trace_py_var=2): model = Model(input_size=1) batch_size = 10 x = jt.float32(np.random.rand(batch_size, 1)) y = model(x) y.sync() data = jt.dump_trace_data() jt.clear_trace_data() with open(f"{jt.flags.cache_path}/simple_model.pkl", "wb") as f: pickle.dump(data, f)
def test_resnet_train(self): with jt.flag_scope(trace_py_var=2): resnet18 = resnet.Resnet18() opt = jt.optim.SGD(resnet18.parameters(), 0.1) x = jt.float32(np.random.rand(2, 3, 224, 224)) y = resnet18(x) opt.step(y**2) jt.sync_all() data = jt.dump_trace_data() jt.clear_trace_data()
def test_64_bit(self): a = np.random.rand(10) b = jt.array(a) assert b.dtype == "float32" with jt.flag_scope(auto_convert_64_to_32=0): a = np.random.rand(10) b = jt.array(a) assert b.dtype == "float64" a = np.random.rand(10) b = jt.array64(a) assert b.dtype == "float64"
def test_matmul_type_cuda(self): with jt.flag_scope(use_cuda=1): test_matmul2([2, 5], [5, 8], False, False, 'float32') test_matmul2([5, 2], [5, 8], True, False, 'float32') test_matmul2([2, 5], [8, 5], False, True, 'float32') test_matmul2([2, 5], [5, 8], False, False, 'float64') test_matmul2([5, 2], [5, 8], True, False, 'float64') test_matmul2([2, 5], [8, 5], False, True, 'float64') test_matmul2([2, 5], [5, 8], False, False, 'int32') test_matmul2([5, 2], [5, 8], True, False, 'int32') test_matmul2([2, 5], [8, 5], False, True, 'int32')
def test_stat(self): jt.clean() with jt.flag_scope(use_stat_allocator=1, use_sfrl_allocator=0): a = jt.random([10, 10]) b = a + a c = a * b c.data del a, b, c gc.collect() assert jt.flags.stat_allocator_total_alloc_call == 2 assert jt.flags.stat_allocator_total_alloc_byte == 800 assert jt.flags.stat_allocator_total_free_call == 2 assert jt.flags.stat_allocator_total_free_byte == 800
def test_simple_model_train(self): with jt.flag_scope(trace_py_var=2): model = Model(input_size=1) opt = jt.optim.SGD(model.parameters(), 0.1) batch_size = 10 x = jt.float32(np.random.rand(batch_size, 1)) y = model(x) opt.step(y**2) jt.sync_all() data = jt.dump_trace_data() jt.clear_trace_data()
def check(xshape, wshape, stride=1, padding=0, dilation=1): with jt.log_capture_scope(use_cuda=1, enable_tuner=1, log_v=0, log_vprefix="op.cc=100" ) as raw_log: x = jt.random(xshape) w = jt.random(wshape) y = conv_oihw(x, w, stride, padding, dilation) y.sync() with jt.flag_scope(use_cuda=0, enable_tuner=1): cy = conv_oihw(x, w, stride, padding, dilation) cy.sync() logs = find_log_with_re(raw_log, "(Jit op key (not )?found: cudnn_conv.*)") assert len(logs)==1 and "oihw" in logs[0][0], logs assert np.allclose(y.data, cy.data), np.abs(y.data-cy.data).max()
def test(self): a = jt.array([1,2,3]) a.sync() assert a.compile_options=={} a.compile_options = {"compile_shapes":1} assert a.compile_options=={"compile_shapes":1} b = a+a assert b.compile_options=={} with jt.flag_scope(compile_options={"compile_shapes":1}): c = a+b assert c.compile_options=={"compile_shapes":1} with jt.profile_scope() as report: c.sync() assert len(report)==2 and "compile_shapes:1" in report[1][0]
def check_gpu_with_cpu(T, C, N, S, S_min): jt.set_global_seed(1) # Initialize random batch of input vectors, for *size = (T,N,C) input = jt.randn(T, N, C).log_softmax(2) # input = -jt.ones((T, N, C)) # input[0,0,1] += 0.01 # Initialize random batch of targets (0 = blank, 1:C = classes) target = jt.randint(low=1, high=C, shape=(N, S), dtype=jt.int) _input_jt = input input_lengths = jt.full((N, ), T, dtype=jt.int) target_lengths = jt.randint(low=S_min, high=S + 1, shape=(N, ), dtype=jt.int) # ctc_loss = nn.CTCLoss() loss = jt.ctc_loss(input, target, input_lengths, target_lengths, reduction='none') _loss_jt = loss loss_jt = loss.numpy() dinput_jt = jt.grad(_loss_jt, _input_jt) dinput_jt.sync() with jt.flag_scope(use_cuda=1): input = input.copy() target = target.copy() input_lengths = input_lengths.copy() target_lengths = target_lengths.copy() loss = jt.ctc_loss(input, target, input_lengths, target_lengths, reduction='none') grad = jt.grad(loss, input) np.testing.assert_allclose(_loss_jt.numpy(), loss.numpy(), atol=1e-5, rtol=1e-5) np.testing.assert_allclose(dinput_jt.numpy(), grad.numpy(), atol=1e-5, rtol=1e-5)
def check(xshape, wshape, stride=(1,1,1), padding=(0,0,0), dilation=(1,1,1), group=1): with jt.flag_scope(use_cuda=1): x = jt.random(xshape) w = jt.random(wshape) # y = jt.cudnn.ops.cudnn_conv3d(x, w, *stride, *padding, *dilation, group) y = jt.nn.conv3d(x, w, None, stride, padding, dilation, group) masky = jt.rand_like(y) dx, dw = jt.grad(masky*y, [x, w]) jt.sync_all() y2 = jt.nn.conv3d(x, w, None, stride, padding, dilation, group) dx2, dw2 = jt.grad(masky*y2, [x, w]) np.testing.assert_allclose(y.data, y2.data) np.testing.assert_allclose(dx.data, dx2.data, rtol=1e-5, atol=1e-3) np.testing.assert_allclose(dw.data, dw2.data, rtol=1e-5, atol=1e-3)