Пример #1
0
    def __init__(self, num_workers: int, host_ip):
        """Start Rabit tracker. The workers connect to this tracker to share
        their results."""

        self._num_workers = num_workers
        self.env = {"DMLC_NUM_WORKER": self._num_workers}
        self.rabit_tracker = xgb.RabitTracker(hostIP=host_ip,
                                              nslave=self._num_workers)
Пример #2
0
def _start_rabit_tracker(num_workers: int):
    """Start Rabit tracker. The workers connect to this tracker to share
    their results."""
    host = get_node_ip_address()

    env = {"DMLC_NUM_WORKER": num_workers}
    rabit_tracker = xgb.RabitTracker(hostIP=host, nslave=num_workers)

    # Get tracker Host + IP
    env.update(rabit_tracker.slave_envs())
    rabit_tracker.start(num_workers)

    # Wait until context completion
    thread = Thread(target=rabit_tracker.join)
    thread.daemon = True
    thread.start()

    return env
Пример #3
0
def _start_rabit_tracker(num_workers: int):
    """Start Rabit tracker. The workers connect to this tracker to share
    their results."""
    # TODO (hme): Cleanup thread and tracker after training.
    host = systems_utils.get_private_ip()

    env = {"DMLC_NUM_WORKER": num_workers}
    rabit_tracker = xgb.RabitTracker(hostIP=host, nslave=num_workers)

    # Get tracker Host + IP
    env.update(rabit_tracker.slave_envs())
    rabit_tracker.start(num_workers)

    # Wait until context completion
    thread = Thread(target=rabit_tracker.join)
    thread.daemon = True
    thread.start()

    return env
Пример #4
0
 def __init__(self, num_workers: int, host_ip):
     self._num_workers = num_workers
     self.env = {"DMLC_NUM_WORKER": self._num_workers}
     self.rabit_tracker = xgb.RabitTracker(hostIP=host_ip,
                                           nslave=self._num_workers)