示例#1
0
    def test_blacklist_host(self):
        """Tests the hosts are blacklisted, resulting in changes to the available hosts."""
        mock_discovery = mock.Mock()
        mock_discovery.find_available_hosts_and_slots.return_value = {
            'a': 2,
            'b': 2
        }
        host_manager = HostManager(mock_discovery)

        host_manager.update_available_hosts()

        # Sanity check before we blacklist
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'a', 'b'}
        assert current_hosts.count_available_slots() == 4

        # Now blacklist, our existing object should not change (immutable)
        host_manager.blacklist('a')
        assert current_hosts.available_hosts == {'a', 'b'}
        assert current_hosts.count_available_slots() == 4

        # Check the new object, make sure we've blacklisted the host
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'b'}
        assert current_hosts.count_available_slots() == 2
示例#2
0
文件: driver.py 项目: rongou/horovod
    def __init__(self,
                 rendezvous,
                 discovery,
                 min_np,
                 max_np,
                 timeout=None,
                 reset_limit=None,
                 cooldown_range=None,
                 verbose=0):
        self._rendezvous = rendezvous
        self._host_manager = HostManager(discovery, cooldown_range)
        self._min_np = min_np
        self._max_np = max_np
        self._verbose = verbose

        self._host_assignments = {}
        self._rank_assignments = {}
        self._world_size = 0

        self._wait_hosts_cond = threading.Condition()
        self._timeout = timeout or int(
            os.getenv('HOROVOD_ELASTIC_TIMEOUT', ELASTIC_TIMEOUT_SECS))

        self._create_worker_fn = None
        self._worker_clients = {}

        self._worker_registry = WorkerStateRegistry(self,
                                                    self._host_manager,
                                                    reset_limit=reset_limit)
        self._results = ResultsRecorder()
        self._shutdown = threading.Event()

        self._discovery_thread = threading.Thread(target=self._discover_hosts)
        self._discovery_thread.daemon = True
        self._discovery_thread.start()
示例#3
0
    def test_update_available_hosts(self):
        """Tests that the current hosts object is immutable, while fetching the latest is correctly updated."""
        mock_discovery = mock.Mock()
        mock_discovery.find_available_hosts_and_slots.side_effect = [
            {'a': 2},
            {'a': 2, 'b': 2},
            {'b': 2},
            {'b': 1, 'c': 1},
            {'b': 1, 'c': 1}
        ]
        host_manager = HostManager(mock_discovery)

        # Should be empty initially
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == set()
        assert current_hosts.count_available_slots() == 0

        # From empty to {'a': 2}, it is an add update
        assert host_manager.update_available_hosts() == HostUpdateResult.added

        # First, check that nothing changed with our existing object, which is immutable
        assert current_hosts.available_hosts == set()
        assert current_hosts.count_available_slots() == 0

        # Now verify that the new object has the correct sets
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'a'}
        assert current_hosts.count_available_slots() == 2

        # Now check again
        # It is an increase update
        assert host_manager.update_available_hosts() == HostUpdateResult.added
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'a', 'b'}
        assert current_hosts.count_available_slots() == 4

        # And again
        # It is a removal update
        assert host_manager.update_available_hosts() == HostUpdateResult.removed
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'b'}
        assert current_hosts.count_available_slots() == 2

        # Try one more time
        # It is a mix update
        assert host_manager.update_available_hosts() == HostUpdateResult.mixed
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'b', 'c'}
        assert current_hosts.count_available_slots() == 2

        # Finally
        # No change
        assert host_manager.update_available_hosts() == HostUpdateResult.no_update
        current_hosts = host_manager.current_hosts
        assert current_hosts.available_hosts == {'b', 'c'}
        assert current_hosts.count_available_slots() == 2