def __init__(self, num_workers: int, host_ip): """Start Rabit tracker. The workers connect to this tracker to share their results.""" self._num_workers = num_workers self.env = {"DMLC_NUM_WORKER": self._num_workers} self.rabit_tracker = xgb.RabitTracker(hostIP=host_ip, nslave=self._num_workers)
def _start_rabit_tracker(num_workers: int): """Start Rabit tracker. The workers connect to this tracker to share their results.""" host = get_node_ip_address() env = {"DMLC_NUM_WORKER": num_workers} rabit_tracker = xgb.RabitTracker(hostIP=host, nslave=num_workers) # Get tracker Host + IP env.update(rabit_tracker.slave_envs()) rabit_tracker.start(num_workers) # Wait until context completion thread = Thread(target=rabit_tracker.join) thread.daemon = True thread.start() return env
def _start_rabit_tracker(num_workers: int): """Start Rabit tracker. The workers connect to this tracker to share their results.""" # TODO (hme): Cleanup thread and tracker after training. host = systems_utils.get_private_ip() env = {"DMLC_NUM_WORKER": num_workers} rabit_tracker = xgb.RabitTracker(hostIP=host, nslave=num_workers) # Get tracker Host + IP env.update(rabit_tracker.slave_envs()) rabit_tracker.start(num_workers) # Wait until context completion thread = Thread(target=rabit_tracker.join) thread.daemon = True thread.start() return env
def __init__(self, num_workers: int, host_ip): self._num_workers = num_workers self.env = {"DMLC_NUM_WORKER": self._num_workers} self.rabit_tracker = xgb.RabitTracker(hostIP=host_ip, nslave=self._num_workers)