def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): print(rest_endpoint) exp = Experiment() exp.connect_experiment(rest_endpoint) print(exp.get_job_statistics()) print(exp.get_experiment_status()) print(exp.list_trial_jobs())
def view_experiment(args): exp_id = args.id port = args.port exp_dir = args.experiment_dir config_json = get_stopped_experiment_config_json(exp_id, exp_dir) if config_json.get('trainingServicePlatform'): legacy_launcher.view_experiment(args) exit() exp = Experiment._view(exp_id, exp_dir) exp.start(port, run_mode=RunMode.Detach)
def view_experiment(args): exp_id = args.id port = args.port exp_dir = args.experiment_dir init_logger_for_command_line() logging.getLogger('nni').setLevel(logging.INFO) config_json = get_stopped_experiment_config_json(exp_id, exp_dir) if config_json.get('trainingServicePlatform'): legacy_launcher.view_experiment(args) exit() exp = Experiment._view(exp_id, exp_dir) exp.start(port, run_mode=RunMode.Detach)
def resume_experiment(args): exp_id = args.id port = args.port debug = args.debug foreground = args.foreground exp_dir = args.experiment_dir config_json = get_stopped_experiment_config_json(exp_id, exp_dir) if config_json.get('trainingServicePlatform'): legacy_launcher.resume_experiment(args) exit() exp = Experiment._resume(exp_id, exp_dir) run_mode = RunMode.Foreground if foreground else RunMode.Detach exp.start(port, debug, run_mode)
def resume_experiment(args): exp_id = args.id port = args.port debug = args.debug foreground = args.foreground exp_dir = args.experiment_dir init_logger_for_command_line() logging.getLogger('nni').setLevel(logging.INFO) config_json = get_stopped_experiment_config_json(exp_id, exp_dir) if config_json.get('trainingServicePlatform'): legacy_launcher.resume_experiment(args) exit() exp = Experiment._resume(exp_id, exp_dir) run_mode = RunMode.Foreground if foreground else RunMode.Detach exp.start(port, debug, run_mode)
# FIXME: For demonstration only. It should not be here from pathlib import Path from nni.experiment import Experiment from nni.algorithms.hpo.hyperopt_tuner import HyperoptTuner tuner = HyperoptTuner('tpe') search_space = { "dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] }, "conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] }, "hidden_size": { "_type": "choice", "_value": [124, 512, 1024] }, "batch_size": { "_type": "choice", "_value": [16, 32] }, "learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] } } experiment = Experiment(tuner, 'local') experiment.config.experiment_name = 'test' experiment.config.trial_concurrency = 2 experiment.config.max_trial_number = 5 experiment.config.search_space = search_space experiment.config.trial_command = 'python3 mnist.py' experiment.config.trial_code_directory = Path(__file__).parent experiment.config.training_service.use_active_gpu = True experiment.run(8081)
}, "hidden_size": { "_type": "choice", "_value": [124, 512, 1024] }, "batch_size": { "_type": "choice", "_value": [16, 32] }, "learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] } } experiment = Experiment(['local', 'remote']) experiment.config.experiment_name = 'test' experiment.config.trial_concurrency = 3 experiment.config.max_trial_number = 10 experiment.config.search_space = search_space experiment.config.trial_command = 'python3 mnist.py' experiment.config.trial_code_directory = Path(__file__).parent experiment.config.tuner.name = 'TPE' experiment.config.tuner.class_args['optimize_mode'] = 'maximize' experiment.config.training_service[0].use_active_gpu = True experiment.config.training_service[1].reuse_mode = True rm_conf = RemoteMachineConfig() rm_conf.host = '10.1.1.1' rm_conf.user = '******' rm_conf.password = '******' rm_conf.port = 22
# Define search space search_space = { 'features': { '_type': 'choice', '_value': [128, 256, 512, 1024] }, 'lr': { '_type': 'loguniform', '_value': [0.0001, 0.1] }, 'momentum': { '_type': 'uniform', '_value': [0, 1] }, } # Configure experiment experiment = Experiment('local') experiment.config.trial_command = 'python model.py' experiment.config.trial_code_directory = Path(__file__).parent experiment.config.search_space = search_space experiment.config.tuner.name = 'Random' experiment.config.max_trial_number = 10 experiment.config.trial_concurrency = 2 # Run it! experiment.run(port=8080, wait_completion=False) print('Experiment is running. Press Ctrl-C to quit.') signal.pause()
def create_experiment(args): # to make it clear what are inside args config_file = Path(args.config) port = args.port debug = args.debug url_prefix = args.url_prefix foreground = args.foreground # it should finally be done in nnictl main function # but for now don't break routines without logging support init_logger_for_command_line() logging.getLogger('nni').setLevel(logging.INFO) if not config_file.is_file(): _logger.error(f'"{config_file}" is not a valid file.') exit(1) with config_file.open() as config: config_content = yaml.safe_load(config) v1_platform = config_content.get('trainingServicePlatform') if v1_platform: can_convert = True if v1_platform == 'adl': can_convert = False if v1_platform in ['kubeflow', 'frameworkcontroller']: reuse = config_content.get(v1_platform + 'Config', {}).get('reuse') can_convert = ( reuse != False ) # if user does not explicitly specify it, convert to reuse mode if not can_convert: legacy_launcher.create_experiment(args) exit() try: v2_config = convert.to_v2(config_content) except Exception: _logger.error( 'You are using legacy config format with incorrect fields or values, ' 'to get more accurate error message please update it to the new format.' ) _logger.error( 'Reference: https://nni.readthedocs.io/en/stable/reference/experiment_config.html' ) exit(1) _logger.warning( f'You are using legacy config file, please update it to latest format:' ) # use `print` here because logging will add timestamp and make it hard to copy paste print(Fore.YELLOW + '=' * 80 + Fore.RESET) print(yaml.dump(v2_config, sort_keys=False).strip()) print(Fore.YELLOW + '=' * 80 + Fore.RESET) print( Fore.YELLOW + 'Reference: https://nni.readthedocs.io/en/stable/reference/experiment_config.html' + Fore.RESET) utils.set_base_path(config_file.parent) config = ExperimentConfig(**v2_config) utils.unset_base_path() else: config = ExperimentConfig.load(config_file) if config.use_annotation: path = Path(tempfile.gettempdir(), getuser(), 'nni', 'annotation') path.mkdir(parents=True, exist_ok=True) path = tempfile.mkdtemp(dir=path) code_dir = expand_annotations(config.trial_code_directory, path) config.trial_code_directory = code_dir config.search_space = generate_search_space(code_dir) assert config.search_space, 'ERROR: Generated search space is empty' config.use_annotation = False exp = Experiment(config) exp.url_prefix = url_prefix run_mode = RunMode.Foreground if foreground else RunMode.Detach exp.start(port, debug, run_mode) _logger.info( f'To stop experiment run "nnictl stop {exp.id}" or "nnictl stop --all"' ) _logger.info( 'Reference: https://nni.readthedocs.io/en/stable/Tutorial/Nnictl.html')
'_type': 'uniform', '_value': [0, 1] }, } # %% # Step 3: Configure the experiment # -------------------------------- # NNI uses an *experiment* to manage the HPO process. # The *experiment config* defines how to train the models and how to explore the search space. # # In this tutorial we use a *local* mode experiment, # which means models will be trained on local machine, without using any special training platform. from nni.experiment import Experiment experiment = Experiment('local') # %% # Now we start to configure the experiment. # # Configure trial code # ^^^^^^^^^^^^^^^^^^^^ # In NNI evaluation of each hyperparameter set is called a *trial*. # So the model script is called *trial code*. experiment.config.trial_command = 'python model.py' experiment.config.trial_code_directory = '.' # %% # When ``trial_code_directory`` is a relative path, it relates to current working directory. # To run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``. # (`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__ # is only available in standard Python, not in Jupyter Notebook.)
}, "beta": { "_type": "choice", "_value": [0, 1] }, "weight_decay": { "_type": "loguniform", "_value": [1e-9, 1] }, "gradient_clip_val": { "_type": "choice", "_value": [0, 0.25] } } experiment = Experiment(tuner, ['local', 'remote']) experiment.config.experiment_name = 'awd_lstm' experiment.config.author_name = 'Igor Quintanilha' experiment.config.max_trial_number = 100 experiment.config.max_experiment_duration = '60d' experiment.config.nni_manager_ip = '10.221.90.21' experiment.config.search_space = search_space experiment.config.trial_prepare_command = 'source /home/igor.quintanilha/miniconda3/bin/activate dsc' experiment.config.trial_command = 'python main.py --gpus 1 data/brtd --vocab data/brtd/b3922f0904f4f1b7b258a9488132f2e6480cf936493be53f74fd7aaa07e14781.8f9337.vocab --batch-size 64 --max_epochs 10 --terminate_on_nan --num-embedding 400 --num-layers 3 --num-hidden 1150 --model awd --bptt 20 --max_steps 150000 --val_check_interval .25' experiment.config.trial_code_directory = Path(__file__).parent.parent experiment.config.trial_concurrency = 2 experiment.config.trial_gpu_number = 1 experiment.config.training_service[0].use_active_gpu = True