def setUp(self): self.rank = self.MAIN_PROCESS_RANK self.file = tempfile.NamedTemporaryFile() self.port = common.find_free_port() self.processes = [ self._spawn_process(rank) for rank in range(int(self.world_size)) ]
def test_nominal(self): os.environ['WORLD_SIZE'] = '2' os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = str(common.find_free_port()) # First rank os.environ['RANK'] = '0' gen0 = c10d.rendezvous('env://') store0, rank0, size0 = next(gen0) self.assertEqual(0, rank0) self.assertEqual(2, size0) # Second rank os.environ['RANK'] = '1' gen1 = c10d.rendezvous('env://') store1, rank1, size1 = next(gen1) self.assertEqual(1, rank1) self.assertEqual(2, size1) # Set value on both stores store0.set("key0", "value0") store1.set("key1", "value1") # Cross check with get self.assertEqual(b"value0", store1.get("key0")) self.assertEqual(b"value1", store0.get("key1"))
def test_address_already_in_use(self): with self.assertRaisesRegex(RuntimeError, "^Address already in use$"): addr = 'localhost' port = common.find_free_port() # Use noqa to silence flake8. # Need to store in an unused variable here to ensure the first # object is not destroyed before the second object is created. store1 = c10d.TCPStore(addr, port, True) # noqa: F841 store2 = c10d.TCPStore(addr, port, True) # noqa: F841
def create_tcp_store(addr): """ Creates a TCP store. Retries if the chosen port is already in use. """ while True: try: port = common.find_free_port() return c10d.TCPStore(addr, port, True) except RuntimeError as error: if str(error) == "Address already in use": continue raise
def setUp(self): # Adding this hack until we fix the FileStore to delete its # content at the end global INIT_METHOD if INIT_METHOD.startswith("file://"): _, filename = tempfile.mkstemp(prefix=FOLDER) INIT_METHOD = "file://{}".format(filename) if INIT_METHOD.startswith("env://"): port = common.find_free_port() os.environ["MASTER_PORT"] = str(port) self.processes = [] self.rank = self.MANAGER_PROCESS_RANK Barrier.init() for rank in range(int(WORLD_SIZE)): self.processes.append(self._spawn_process(rank))
def test_nominal(self): addr = 'localhost' port = common.find_free_port() url = 'tcp://%s:%d?world_size=%d' % (addr, port, 2) gen0 = c10d.rendezvous(url + "&rank=0") store0, rank0, size0 = next(gen0) self.assertEqual(0, rank0) self.assertEqual(2, size0) gen1 = c10d.rendezvous(url + "&rank=1") store1, rank1, size1 = next(gen1) self.assertEqual(1, rank1) self.assertEqual(2, size1) # Set value on both stores store0.set("key0", "value0") store1.set("key1", "value1") # Cross check with get self.assertEqual(b"value0", store1.get("key0")) self.assertEqual(b"value1", store0.get("key1"))
def test_common_errors(self): vars = { "WORLD_SIZE": "2", "RANK": "0", "MASTER_ADDR": "127.0.0.1", "MASTER_PORT": common.find_free_port(), } class Env(object): def __init__(self, vars): self.vars = vars def __enter__(self): for key, value in self.vars.items(): os.environ[key] = str(value) def __exit__(self, type, value, traceback): for key in self.vars.keys(): del os.environ[key] def without(d, key): d = d.copy() d.pop(key) return d with Env(without(vars, 'WORLD_SIZE')): with self.assertRaisesRegex(ValueError, 'WORLD_SIZE expected'): gen = c10d.rendezvous('env://') next(gen) with Env(without(vars, 'RANK')): with self.assertRaisesRegex(ValueError, 'RANK expected'): gen = c10d.rendezvous('env://') next(gen) with Env(without(vars, 'MASTER_ADDR')): with self.assertRaisesRegex(ValueError, 'MASTER_ADDR expected'): gen = c10d.rendezvous('env://') next(gen) with Env(without(vars, 'MASTER_PORT')): with self.assertRaisesRegex(ValueError, 'MASTER_PORT expected'): gen = c10d.rendezvous('env://') next(gen)
def test_address_already_in_use(self): with self.assertRaisesRegex(RuntimeError, "^Address already in use$"): addr = 'localhost' port = common.find_free_port() store1 = c10d.TCPStore(addr, port, True) store2 = c10d.TCPStore(addr, port, True)
def setUp(self): addr = 'localhost' port = common.find_free_port() self.tcpstore = c10d.TCPStore(addr, port, True) self.prefix = "test_prefix" self.tcpstore.set_timeout(timedelta(seconds=300))
def _create_store(self): addr = 'localhost' port = common.find_free_port() store = c10d.TCPStore(addr, port, True) store.set_timeout(timedelta(seconds=300)) return store