def test_py_multi_async_call(self): n = self.rank + 1 dst_rank = n % self.world_size dst_worker_id = dist.get_worker_id("worker{}".format(dst_rank)) fut1 = dist.rpc_async(dst_worker_id, my_class.my_static_method, args=(n + 10,)) fut2 = dist.rpc_async(dst_worker_id, min, args=(n, n + 1, n + 2)) self.assertEqual(fut1.wait(), my_class.my_static_method(n + 10)) self.assertEqual(fut2.wait(), min(n, n + 1, n + 2))
def multi_layer_nested_async_rpc(dst, world_size, ttl): # this method returns immediately without blocking the callee, but will # generate additional requests. if ttl > 0: current_dst = "worker{}".format(dst) next_dst = (dst + 1) % world_size dist.rpc_async( current_dst, multi_layer_nested_async_rpc, args=(next_dst, world_size, ttl - 1), ) return 0
def test_async_add(self): n = self.rank + 1 dst_rank = n % self.world_size fut = dist.rpc_async( "worker{}".format(dst_rank), torch.add, args=(torch.ones(n, n), torch.ones(n, n)), ) self.assertEqual(fut.wait(), torch.ones(n, n) * 2)
def _stress_test_rpc(self, f, repeat=1000, args=()): import time n = self.rank + 1 dst_rank = n % self.world_size futs = [] tik = time.time() for _ in range(repeat): fut = dist.rpc_async("worker{}".format(dst_rank), f, args=args) futs.append(fut) for fut in futs: self.assertEqual(fut.wait(), 0) tok = time.time() print("Rank {} finished testing {} {} times in {} seconds.".format( self.rank, f.__name__, repeat, tok - tik))
def test_py_tensors_multi_async_call(self): futs = [] n = self.rank + 1 dst_rank = n % self.world_size for i in range(100): fut = dist.rpc_async( "worker{}".format(dst_rank), my_tensor_function, args=(torch.ones(i, i), torch.ones(i, i)), ) futs.append(fut) j = 0 for fut in futs: self.assertEqual( fut.wait(), my_tensor_function(torch.ones(j, j), torch.ones(j, j)) ) j += 1
def test_py_raise_in_user_func(self): n = self.rank + 1 dst_rank = n % self.world_size fut = dist.rpc_async("worker{}".format(dst_rank), raise_func) with self.assertRaisesRegex(Exception, "ValueError"): fut.wait()