def test_blacklist_host(self): """Tests the hosts are blacklisted, resulting in changes to the available hosts.""" mock_discovery = mock.Mock() mock_discovery.find_available_hosts_and_slots.return_value = { 'a': 2, 'b': 2 } host_manager = HostManager(mock_discovery) host_manager.update_available_hosts() # Sanity check before we blacklist current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'a', 'b'} assert current_hosts.count_available_slots() == 4 # Now blacklist, our existing object should not change (immutable) host_manager.blacklist('a') assert current_hosts.available_hosts == {'a', 'b'} assert current_hosts.count_available_slots() == 4 # Check the new object, make sure we've blacklisted the host current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'b'} assert current_hosts.count_available_slots() == 2
def __init__(self, rendezvous, discovery, min_np, max_np, timeout=None, reset_limit=None, cooldown_range=None, verbose=0): self._rendezvous = rendezvous self._host_manager = HostManager(discovery, cooldown_range) self._min_np = min_np self._max_np = max_np self._verbose = verbose self._host_assignments = {} self._rank_assignments = {} self._world_size = 0 self._wait_hosts_cond = threading.Condition() self._timeout = timeout or int( os.getenv('HOROVOD_ELASTIC_TIMEOUT', ELASTIC_TIMEOUT_SECS)) self._create_worker_fn = None self._worker_clients = {} self._worker_registry = WorkerStateRegistry(self, self._host_manager, reset_limit=reset_limit) self._results = ResultsRecorder() self._shutdown = threading.Event() self._discovery_thread = threading.Thread(target=self._discover_hosts) self._discovery_thread.daemon = True self._discovery_thread.start()
def test_update_available_hosts(self): """Tests that the current hosts object is immutable, while fetching the latest is correctly updated.""" mock_discovery = mock.Mock() mock_discovery.find_available_hosts_and_slots.side_effect = [ {'a': 2}, {'a': 2, 'b': 2}, {'b': 2}, {'b': 1, 'c': 1}, {'b': 1, 'c': 1} ] host_manager = HostManager(mock_discovery) # Should be empty initially current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == set() assert current_hosts.count_available_slots() == 0 # From empty to {'a': 2}, it is an add update assert host_manager.update_available_hosts() == HostUpdateResult.added # First, check that nothing changed with our existing object, which is immutable assert current_hosts.available_hosts == set() assert current_hosts.count_available_slots() == 0 # Now verify that the new object has the correct sets current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'a'} assert current_hosts.count_available_slots() == 2 # Now check again # It is an increase update assert host_manager.update_available_hosts() == HostUpdateResult.added current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'a', 'b'} assert current_hosts.count_available_slots() == 4 # And again # It is a removal update assert host_manager.update_available_hosts() == HostUpdateResult.removed current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'b'} assert current_hosts.count_available_slots() == 2 # Try one more time # It is a mix update assert host_manager.update_available_hosts() == HostUpdateResult.mixed current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'b', 'c'} assert current_hosts.count_available_slots() == 2 # Finally # No change assert host_manager.update_available_hosts() == HostUpdateResult.no_update current_hosts = host_manager.current_hosts assert current_hosts.available_hosts == {'b', 'c'} assert current_hosts.count_available_slots() == 2