Пример #1
0
    def start(self, port: int = 8080, debug: bool = False) -> None:
        """
        Start the experiment in background.
        This method will raise exception on failure.
        If it returns, the experiment should have been successfully started.
        Parameters
        ----------
        port
            The port of web UI.
        debug
            Whether to start in debug mode.
        """
        atexit.register(self.stop)

        self.id = management.generate_experiment_id()

        if self.config.experiment_working_directory is not None:
            log_dir = Path(self.config.experiment_working_directory, self.id,
                           'log')
        else:
            log_dir = Path.home() / f'nni-experiments/{self.id}/log'
        nni.runtime.log.start_experiment_log(self.id, log_dir, debug)

        self._proc, self._pipe = launcher.start_experiment_retiarii(
            self.id, self.config, port, debug)
        assert self._proc is not None
        assert self._pipe is not None

        self.port = port  # port will be None if start up failed

        # dispatcher must be launched after pipe initialized
        # the logic to launch dispatcher in background should be refactored into dispatcher api
        self._dispatcher = self._create_dispatcher()
        self._dispatcher_thread = Thread(target=self._dispatcher.run)
        self._dispatcher_thread.start()

        ips = [self.config.nni_manager_ip]
        for interfaces in psutil.net_if_addrs().values():
            for interface in interfaces:
                if interface.family == socket.AF_INET:
                    ips.append(interface.address)
        ips = [f'http://{ip}:{port}' for ip in ips if ip]
        msg = 'Web UI URLs: ' + colorama.Fore.CYAN + ' '.join(
            ips) + colorama.Style.RESET_ALL
        _logger.info(msg)

        exp_status_checker = Thread(target=self._check_exp_status)
        exp_status_checker.start()
        self._start_strategy()
        # TODO: the experiment should be completed, when strategy exits and there is no running job
        _logger.info(
            'Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...'
        )
        exp_status_checker.join()
Пример #2
0
    def start(self, port: int = 8080, debug: bool = False) -> None:
        """
        Start the experiment in background.
        This method will raise exception on failure.
        If it returns, the experiment should have been successfully started.
        Parameters
        ----------
        port
            The port of web UI.
        debug
            Whether to start in debug mode.
        """
        atexit.register(self.stop)

        self.config = self.config.canonical_copy()

        # we will probably need a execution engine factory to make this clean and elegant
        if self.config.execution_engine == 'base':
            from ..execution.base import BaseExecutionEngine
            engine = BaseExecutionEngine()
        elif self.config.execution_engine == 'cgo':
            from ..execution.cgo_engine import CGOExecutionEngine

            assert self.config.training_service.platform == 'remote', \
                "CGO execution engine currently only supports remote training service"
            assert self.config.batch_waiting_time is not None
            devices = self._construct_devices()
            engine = CGOExecutionEngine(
                devices,
                max_concurrency=self.config.max_concurrency_cgo,
                batch_waiting_time=self.config.batch_waiting_time)
        elif self.config.execution_engine == 'py':
            from ..execution.python import PurePythonExecutionEngine
            engine = PurePythonExecutionEngine()
        elif self.config.execution_engine == 'benchmark':
            from ..execution.benchmark import BenchmarkExecutionEngine
            engine = BenchmarkExecutionEngine(self.config.benchmark)
        set_execution_engine(engine)

        self.id = management.generate_experiment_id()

        if self.config.experiment_working_directory is not None:
            log_dir = Path(self.config.experiment_working_directory, self.id,
                           'log')
        else:
            log_dir = Path.home() / f'nni-experiments/{self.id}/log'
        nni.runtime.log.start_experiment_log(self.id, log_dir, debug)

        self._proc, self._pipe = launcher.start_experiment_retiarii(
            self.id, self.config, port, debug)
        assert self._proc is not None
        assert self._pipe is not None

        self.port = port  # port will be None if start up failed

        # dispatcher must be launched after pipe initialized
        # the logic to launch dispatcher in background should be refactored into dispatcher api
        self._dispatcher = self._create_dispatcher()
        self._dispatcher_thread = Thread(target=self._dispatcher.run)
        self._dispatcher_thread.start()

        ips = [self.config.nni_manager_ip]
        for interfaces in psutil.net_if_addrs().values():
            for interface in interfaces:
                if interface.family == socket.AF_INET:
                    ips.append(interface.address)
        ips = [f'http://{ip}:{port}' for ip in ips if ip]
        msg = 'Web UI URLs: ' + colorama.Fore.CYAN + ' '.join(
            ips) + colorama.Style.RESET_ALL
        _logger.info(msg)

        exp_status_checker = Thread(target=self._check_exp_status)
        exp_status_checker.start()
        self._start_strategy()
        # TODO: the experiment should be completed, when strategy exits and there is no running job
        _logger.info(
            'Waiting for experiment to become DONE (you can ctrl+c if there is no running trial jobs)...'
        )
        exp_status_checker.join()