def test_backward(self): np.random.seed(0) jt.set_seed(3) model = Model() SGD = jt.nn.SGD(model.parameters(), 0.05, 0.9, 0) n = 1000 batch_size = 50 base_lr = 0.05 # we need to stop grad of global value to prevent memory leak lr = f32(base_lr).name("lr").stop_grad() def get_data(n): for i in range(n): x = np.random.rand(batch_size, 1) y = x * x yield jt.float32(x), jt.float32(y) for i, (x, y) in enumerate(get_data(n)): pred_y = model(x).name("pred_y") loss = ((pred_y - y)**f32(2)).name("loss") loss_mean = loss.mean() SGD.step(loss_mean) if i > 2: assert prev == jt.liveness_info( ), f"memory leak {prev} {jt.liveness_info()}" prev = jt.liveness_info() if (i % 10 == 9): print( f"step {i}, loss = {loss_mean.data.sum()} {jt.liveness_info()}" ) else: loss_mean.data.sum() jt.liveness_info() possible_results = [ 0.00022486248053610325, 0.00020916158973705024, 0.00561215 ] loss_mean = loss_mean.data assert any(abs(loss_mean - r) < 1e-6 for r in possible_results), loss_mean jt.clean()
def test_backward_cuda(self): with jt.flag_scope(use_cuda=1): np.random.seed(0) jt.set_seed(3) model = Model() SGD = jt.nn.SGD(model.parameters(), 0.05, 0.9, 0) n = 1000 batch_size = 50 base_lr = 0.05 # we need to stop grad of global value to prevent memory leak lr = f32(base_lr).name("lr").stop_grad() def get_data(n): for i in range(n): x = np.random.rand(batch_size, 1) y = x * x yield jt.float32(x), jt.float32(y) for i, (x, y) in enumerate(get_data(n)): pred_y = model(x).name("pred_y") # cuda x**2.0 will return nan loss = ((pred_y - y).sqr()).name("loss") loss_mean = loss.mean() SGD.step(loss_mean) if i > 2: assert prev == jt.liveness_info( ), f"memory leak {prev} {jt.liveness_info()}" prev = jt.liveness_info() if (i % 10 == 9): print( f"step {i}, loss = {loss_mean.data.sum()} {jt.liveness_info()}" ) else: loss_mean.data.sum() jt.liveness_info() # result is 0.00018236637697555125 result = 0.00018236637697555125 assert abs(loss_mean.data - result) < 1e-2 jt.clean()
def run_models(self): def to_cuda(x): if jt.has_cuda: return x.cuda() return x threshold = 1e-2 # Define numpy input image bs = 1 test_img = np.random.random((bs, 3, 224, 224)).astype('float32') # test_img = np.random.random((bs,3,280,280)).astype('float32') # Define pytorch & jittor input image pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) for test_model in self.models: if test_model == "inception_v3": test_img = np.random.random( (bs, 3, 300, 300)).astype('float32') pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) # Define pytorch & jittor model pytorch_model = to_cuda(tcmodels.__dict__[test_model]()) jittor_model = jtmodels.__dict__[test_model]() # Set eval to avoid dropout layer pytorch_model.eval() jittor_model.eval() # Jittor loads pytorch parameters to ensure forward alignment jittor_model.load_parameters(pytorch_model.state_dict()) # Judge pytorch & jittor forward relative error. If the differece is lower than threshold, this test passes. pytorch_result = pytorch_model(pytorch_test_img) jittor_result = jittor_model(jittor_test_img) x = pytorch_result.detach().cpu().numpy() + 1 y = jittor_result.data + 1 relative_error = abs(x - y) / abs(y) diff = relative_error.mean() assert diff < threshold, f"[*] {test_model} forward fails..., Relative Error: {diff}" print( f"[*] {test_model} forword passes with Relative Error {diff}") jt.clean() jt.gc() torch.cuda.empty_cache() print('all models pass test.')
def test_var(self): jt.clean() for i in range(2): x = jt.array([[1]]) y = model(x) params = jt.find_vars() assert len(params) == 3 names = [p.name() for p in params] assert names == [ "model/linear_0/var_0", "model/linear_1/var_0", "model/linear_2/var_0", ], str(names) jt.find_var("model/linear_0/var_0") expect_error(lambda: jt.find_var("model/linear_0")) expect_error(lambda: jt.find_var("model/linear")) assert len(jt.find_vars("model/linear_0/var_0")) == 1 assert len(jt.find_vars("model/linear_0/")) == 1 assert len(jt.find_vars("model/")) == 3 jt.clean()
def test_zmem_leak2(self): def test(): class MyFunc(Function): def execute(self, x, z, y): self.x = x.name("x") self.y = y.name("y") return x*y, "test", x/y def grad(self, grad0, _, grad1): assert _ is None res = (grad0 * self.y, None, grad1 * self.x) return res a = jt.array(3.0).name('a') b = jt.array(4.0).name('b') c,_,d = MyFunc()(a, "a", b) c.name('c'), d.name('d') g = jt.grad(c+d*3, [a, b]) test() jt.clean() self.assertEqual(jt.liveness_info()["lived_vars"], 0)
def test_backward(self): np.random.seed(0) jt.set_seed(3) model = Model() SGD = jt.nn.SGD(model.parameters(), 0.05, 0.9, 0) n = 1000 batch_size = 50 base_lr = 0.05 # we need to stop grad of global value to prevent memory leak lr = f32(base_lr).name("lr").stop_grad() def get_data(n): for i in range(n): x = np.random.rand(batch_size, 1) y = x * x yield jt.float32(x), jt.float32(y) for i, (x, y) in enumerate(get_data(n)): pred_y = model(x).name("pred_y") loss = ((pred_y - y)**f32(2)).name("loss") loss_mean = loss.mean() SGD.step(loss_mean) if i > 2: assert prev == jt.liveness_info( ), f"memory leak {prev} {jt.liveness_info()}" prev = jt.liveness_info() if (i % 10 == 9): print( f"step {i}, loss = {loss_mean.data.sum()} {jt.liveness_info()}" ) else: loss_mean.data.sum() jt.liveness_info() # result is 0.00038617782411165535 result = 0.00038617782411165535 assert abs(loss_mean.data - result) < 1e-6, [loss_mean.data, result] jt.clean()
def _init_workers(self): jt.clean() jt.gc() self.index_list = mp.Array('i', self.real_len, lock=False) workers = [] # batch id to worker id self.idmap = mp.Array('i', self.batch_len, lock=False) # global token index self.gid = mp.Value('i', self.batch_len) # global token index condition self.gidc = mp.Condition(self.gid.get_lock()) # number of idle workers self.num_idle = mp.Value('i', 0, lock=False) # number of idle workers condition self.num_idle_c = mp.Condition(self.gid.get_lock()) for i in range(self.num_workers): w = Worker(target=self._worker_main, args=(i,), buffer_size=self.buffer_size, keep_numpy_array=self.keep_numpy_array) workers.append(w) self.workers = workers self.index_list_numpy = np.ndarray(dtype='int32', shape=self.real_len, buffer=self.index_list)
def test1(self): np.random.seed(0) jt.set_seed(3) n = 1000 batch_size = 50 base_lr = 0.05 # we need to stop grad of global value to prevent memory leak lr = f32(base_lr).name("lr").stop_grad() def get_data(n): for i in range(n): x = np.random.rand(batch_size, 1) y = x * x yield jt.float32(x), jt.float32(y) model = Model(input_size=1) ps = model.parameters() for i, (x, y) in enumerate(get_data(n)): pred_y = model(x).name("pred_y") loss = ((pred_y - y)**f32(2)).name("loss") loss_mean = loss.mean() gs = jt.grad(loss_mean, ps) for p, g in zip(ps, gs): p -= g * lr if i > 2: assert prev == jt.liveness_info( ), f"memory leak {prev} {jt.liveness_info()}" prev = jt.liveness_info() print( f"step {i}, loss = {loss_mean.data.sum()} {jt.liveness_info()}" ) # result is 0.0009948202641680837 result = 0.0009948202641680837 assert abs(loss_mean.data - result) < 1e-6 jt.clean()
def test(h, w, total_alloc_call, total_alloc_byte, total_free_call=0, total_free_byte=0): jt.clean() jt.gc() with jt.flag_scope(use_stat_allocator=1): a = jt.random([h, w]) b = a + a c = a * b c.data del a, b, c gc.collect() x = (jt.flags.stat_allocator_total_alloc_call, jt.flags.stat_allocator_total_alloc_byte, jt.flags.stat_allocator_total_free_call, jt.flags.stat_allocator_total_free_byte) y = (total_alloc_call, total_alloc_byte, total_free_call, total_free_byte) assert x == y, (x, y)
def test_lived(self): jt.clean() check(0,0,0) a = jt.array(1.0).stop_fuse() a.name('a') b = jt.array(1.0).stop_fuse() b.name('b') check(2,2,2) c = a * b c.name('c') check(3,3,3) vc = c.numpy() check(3,3,1) da, db = jt.grad(c, [a, b]) da.name('da') db.name('db') check(5,6,4) # dc, 3, da, 1, db, 1 del a, b, c check(2,5,3) da.sync(), db.sync() check(2,2,0) del da, db check(0,0,0)
def tearDown(self): jt.clean() jt.gc()
def test_allmodels(bs=1): # Define numpy input image test_img = np.random.random((bs,3,224,224)).astype('float32') # Define pytorch & jittor input image pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) for model in models: if model == "inception_v3": test_img = np.random.random((bs,3,300,300)).astype('float32') pytorch_test_img = to_cuda(torch.Tensor(test_img)) jittor_test_img = jt.array(test_img) jittor_test_img.stop_grad() pytorch_test_img.requires_grad = False # Define pytorch & jittor model pytorch_model = to_cuda(tcmodels.__dict__[model]()) jittor_model = jtmodels.__dict__[model]() # Set eval to avoid dropout layer pytorch_model.eval() jittor_model.eval() # Jittor loads pytorch parameters to ensure forward alignment jittor_model.load_parameters(pytorch_model.state_dict()) total = 512 warmup = max(2, total // bs // 8) rerun = max(2, total // bs) print("=" * 20 + model + "=" * 20) # Jittor warms up for i in range(warmup): jittor_result = jittor_model(jittor_test_img) jt.sync_all(True) # Test jittor and once forward time sta = time.time() for i in range(rerun): jittor_result = jittor_model(jittor_test_img) jittor_result.sync() jt.sync_all(True) end = time.time() print(f"- Jittor {model} forward average time cost: {round((time.time() - sta) / rerun,5)}, Batch Size: {bs}, FPS: {round(bs * rerun / (end - sta),2)}") # pytorch warmup for i in range(warmup): pytorch_result = pytorch_model(pytorch_test_img) # Test pytorch and once forward time torch.cuda.synchronize() sta = time.time() for i in range(rerun): pytorch_result = pytorch_model(pytorch_test_img) torch.cuda.synchronize() end = time.time() print(f"- Pytorch {model} forward average time cost: {round((end - sta) / rerun,5)}, Batch Size: {bs}, FPS: {round(bs * rerun / (end - sta),2)}") # Judge pytorch & jittor forward relative error. If the differece is lower than threshold, this test passes. x = pytorch_result.detach().cpu().numpy() + 1 y = jittor_result.numpy() + 1 relative_error = abs(x - y) / abs(y) diff = relative_error.mean() assert diff < threshold, f"[*] {model} forward fails..., Relative Error: {diff}" print(f"[*] {model} forword passes with Relative Error {diff}") torch.cuda.empty_cache() jt.clean() jt.gc()
def test_get_var_init(self): jt.clean() assert (jt.make_var(init=[1, 2, 3]).data == [1, 2, 3]).all() assert (jt.make_var(shape=[3], init=np.zeros).data == [0, 0, 0]).all() assert (jt.make_var(init=jt.array([1, 2, 3]) == [1, 2, 3]).data).all() jt.clean()
### Update per update_step learning_rate = 3e-4 num_steps = 200 max_episodes = 1660 update_step = 200 GAMMA = 0.99 env = gym.make('CartPole-v0') hidden_size = 256 num_actions = env.action_space.n all_rewards = [] all_lengths = [] average_lengths = [] graphs = [] jt.clean() model = ActorCritic(4, 256, 2) model.eval() model.train() opt = Adam(model.parameters(), learning_rate) init = lambda o, i: (np.random.rand(o, i) / np.sqrt(o)).astype("float32") seed = 1 np.random.seed(seed) # model.load_parameters({ # "value.0.weight": init(256,4,), # "value.0.bias":np.zeros(256,).astype("float32"), # "value.2.weight": init(1,256,), # "value.2.bias":np.zeros(1,).astype("float32"),
def test_zmem_leak(self): def test(): self.test_multi_grads_multi_out5() test() jt.clean() self.assertEqual(jt.liveness_info()["lived_vars"], 0)