def _check_environment_trains( env, trainer_config, reward_processor=default_reward_processor, meta_curriculum=None, success_threshold=0.9, env_manager=None, ): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file debug_writer = DebugWriter() StatsReporter.add_writer(debug_writer) if env_manager is None: env_manager = SimpleEnvManager(env, EnvironmentParametersChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, run_id=run_id, output_path=dir, train_model=True, load_model=False, seed=seed, meta_curriculum=meta_curriculum, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, output_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) if (success_threshold is not None ): # For tests where we are just checking setup and not reward processed_rewards = [ reward_processor(rewards) for rewards in env.final_rewards.values() ] assert all(not math.isnan(reward) for reward in processed_rewards) assert all(reward > success_threshold for reward in processed_rewards)
def run_training(sub_id: int, run_seed: int, run_options, dispatcher_pipe): """ Launches training session. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. :param dispatcher_pipe: Pipe to communicate with the dispatcher """ logger = logging.getLogger("anha") # General parameters env_path = (run_options['--env'] if run_options['--env'] != 'None' else None) run_id = run_options['--run-id'] load_model = run_options['--load'] train_model = run_options['--train'] save_freq = int(run_options['--save-freq']) keep_checkpoints = int(run_options['--keep-checkpoints']) worker_id = int(run_options['--worker-id']) curriculum_folder = (run_options['--curriculum'] if run_options['--curriculum'] != 'None' else None) lesson = int(run_options['--lesson']) no_graphics = run_options['--no-graphics'] trainer_config_path = run_options['<trainer-config-path>'] model_path = './models/{run_id}'.format(run_id=run_id) summaries_dir = './summaries' trainer_config = load_config(trainer_config_path) env = init_environment(env_path, no_graphics, worker_id + sub_id, run_seed) logger.info("Initialised Environment [" + run_id + "]") maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env) external_brains = {} for brain_name in env.external_brain_names: external_brains[brain_name] = env.brains[brain_name] newRunId = run_id + '-' + str(sub_id) newModelPath = model_path + '-' + str(sub_id) # Create controller and begin training. tc = TrainerController(newModelPath, summaries_dir, newRunId, save_freq, maybe_meta_curriculum, load_model, train_model, keep_checkpoints, lesson, external_brains, run_seed, dispatcher_pipe) # Signal that environment has been launched. dispatcher_pipe.send(True) # Begin training tc.start_learning(env, trainer_config)
def test_load_config(mock_communicator, mock_launcher, dummy_config): open_name = 'mlagents.trainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_load.return_value = dummy_config mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) mock_load.return_value = dummy_config tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '', '', False) config = tc._load_config() assert (len(config) == 1) assert (config['default']['trainer'] == "ppo")
def basic_trainer_controller(): trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() return TrainerController( trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", meta_curriculum=None, train=True, training_seed=99, )
def basic_trainer_controller(): trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() return TrainerController( trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", param_manager=EnvironmentParameterManager(), train=True, training_seed=99, )
def run_training(sub_id, run_seed, run_options, process_queue): """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters docker_target_name = (run_options['--docker-target-name'] if run_options['--docker-target-name'] != 'None' else None) # General parameters env_path = (run_options['--env'] if run_options['--env'] != 'None' else None) run_id = run_options['--run-id'] load_model = run_options['--load'] train_model = run_options['--train'] save_freq = int(run_options['--save-freq']) keep_checkpoints = int(run_options['--keep-checkpoints']) worker_id = int(run_options['--worker-id']) curriculum_file = (run_options['--curriculum'] if run_options['--curriculum'] != 'None' else None) lesson = int(run_options['--lesson']) fast_simulation = not bool(run_options['--slow']) no_graphics = run_options['--no-graphics'] trainer_config_path = run_options['<trainer-config-path>'] # Create controller and launch environment. tc = TrainerController(env_path, run_id + '-' + str(sub_id), save_freq, curriculum_file, fast_simulation, load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, run_seed, docker_target_name, trainer_config_path, no_graphics) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning()
def basic_trainer_controller(brain_info): return TrainerController(model_path='test_model_path', summaries_dir='test_summaries_dir', run_id='test_run_id', save_freq=100, meta_curriculum=None, load=True, train=True, keep_checkpoints=False, lesson=None, external_brains={'testbrain': brain_info}, training_seed=99)
def basic_trainer_controller(): return TrainerController( trainer_factory=None, model_path="test_model_path", summaries_dir="test_summaries_dir", run_id="test_run_id", save_freq=100, meta_curriculum=None, train=True, training_seed=99, sampler_manager=SamplerManager({}), resampling_interval=None, )
def test_initialize_offline_trainers(mock_communicator, mock_launcher, dummy_config, dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config): open_name = 'mlagents.trainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_communicator.return_value = MockCommunicator( discrete_action=False, stack=False, visual_inputs=0, brain_name="Ball3DBrain", vec_obs_size=8) tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1, 1, 1, '', "tests/test_mlagents.trainers.py", False) # Test for Offline Behavior Cloning Trainer mock_load.return_value = dummy_offline_bc_config config = tc._load_config() tf.reset_default_graph() tc._initialize_trainers(config) assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): seed = 27 trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() TrainerController( trainer_factory=trainer_factory_mock, output_path="", run_id="1", param_manager=None, train=True, training_seed=seed, ) numpy_random_seed.assert_called_with(seed) tensorflow_set_seed.assert_called_with(seed)
def basic_trainer_controller(): trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() return TrainerController( trainer_factory=trainer_factory_mock, output_path="test_model_path", run_id="test_run_id", save_freq=100, meta_curriculum=None, train=True, training_seed=99, sampler_manager=SamplerManager({}), resampling_interval=None, )
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): seed = 27 trainer_factory_mock = MagicMock() trainer_factory_mock.ghost_controller = GhostController() TrainerController( trainer_factory=trainer_factory_mock, output_path="", run_id="1", meta_curriculum=None, train=True, training_seed=seed, sampler_manager=SamplerManager({}), resampling_interval=None, ) numpy_random_seed.assert_called_with(seed) tensorflow_set_seed.assert_called_with(seed)
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): seed = 27 TrainerController( trainer_factory=None, model_path="", summaries_dir="", run_id="1", save_freq=1, meta_curriculum=None, train=True, training_seed=seed, sampler_manager=SamplerManager({}), resampling_interval=None, ) numpy_random_seed.assert_called_with(seed) tensorflow_set_seed.assert_called_with(seed)
def _check_environment_trains(env, config, meta_curriculum=None, success_threshold=0.99): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file trainer_config = yaml.safe_load(config) env_manager = SimpleEnvManager(env, FloatPropertiesChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=meta_curriculum, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) print(tc._get_measure_vals()) if (success_threshold is not None ): # For tests where we are just checking setup and not reward for mean_reward in tc._get_measure_vals().values(): assert not math.isnan(mean_reward) assert mean_reward > success_threshold
def _check_environment_trains(env, config): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 trainer_config = yaml.safe_load(config) env_manager = SimpleEnvManager(env) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=None, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=None, train=True, training_seed=seed, fast_simulation=True, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) print(tc._get_measure_vals()) for brain_name, mean_reward in tc._get_measure_vals().items(): assert not math.isnan(mean_reward) assert mean_reward > 0.99
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): model_path = f"./models/{options.run_id}" maybe_init_path = ( f"./models/{options.initialize_from}" if options.initialize_from else None ) summaries_dir = "./summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) handle_existing_directories( model_path, summaries_dir, options.resume, options.force, maybe_init_path ) tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( options.env_path, options.no_graphics, run_seed, port, options.env_args ) engine_config = EngineConfig( width=options.width, height=options.height, quality_level=options.quality_level, time_scale=options.time_scale, target_frame_rate=options.target_frame_rate, capture_frame_rate=options.capture_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed ) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, not options.inference, options.resume, run_seed, maybe_init_path, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, not options.inference, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_timing_tree(summaries_dir, options.run_id)
def run_training(sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters docker_target_name = (run_options["--docker-target-name"] if run_options["--docker-target-name"] != "None" else None) # General parameters env_path = run_options["--env"] if run_options["--env"] != "None" else None run_id = run_options["--run-id"] load_model = run_options["--load"] train_model = run_options["--train"] save_freq = int(run_options["--save-freq"]) keep_checkpoints = int(run_options["--keep-checkpoints"]) base_port = int(run_options["--base-port"]) num_envs = int(run_options["--num-envs"]) curriculum_folder = (run_options["--curriculum"] if run_options["--curriculum"] != "None" else None) lesson = int(run_options["--lesson"]) fast_simulation = not bool(run_options["--slow"]) no_graphics = run_options["--no-graphics"] trainer_config_path = run_options["<trainer-config-path>"] sampler_file_path = (run_options["--sampler"] if run_options["--sampler"] != "None" else None) # Recognize and use docker volume if one is passed as an argument if not docker_target_name: model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=docker_target_name) trainer_config = load_config(trainer_config_path) env_factory = create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs), list([str(x) for t in run_options.items() for x in t]), # NOTE passes all arguments to Unity ) env = SubprocessEnvManager(env_factory, num_envs) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env) sampler_manager, resampling_interval = create_sampler_manager( sampler_file_path, env.reset_parameters) # Create controller and begin training. tc = TrainerController( model_path, summaries_dir, run_id + "-" + str(sub_id), save_freq, maybe_meta_curriculum, load_model, train_model, keep_checkpoints, lesson, run_seed, fast_simulation, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env, trainer_config)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ options.checkpoint_settings.run_id = "test8" with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = (os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json")) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) if env_settings.env_path is None: port = None # Begin training env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) max_steps = options.behaviors['Brain'].max_steps options.behaviors['Brain'].max_steps = 10 trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=0) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights tc.init_weights(env_manager) inital_weights = deepcopy(tc.weights) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) options.behaviors['Brain'].max_steps = max_steps step = 0 counter = 0 max_meta_updates = 200 while counter < max_meta_updates: sample = np.random.random_sample() if (sample > 1): print("Performing Meta-learning on Carry Object stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe" else: print("Performing Meta-learning on Find Target stage") env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe" env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath( run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, engine_config, env_settings.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume) sampler_manager, resampling_interval = create_sampler_manager( options.parameter_randomization, run_seed) trainer_factory = TrainerFactory(options, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, total_steps=step) trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT trainer_factory.trainer_config[ 'Brain'].hyperparameters.learning_rate = 0.0005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.beta = 0.005 * ( 1 - counter / max_meta_updates) trainer_factory.trainer_config[ 'Brain'].hyperparameters.epsilon = 0.2 * ( 1 - counter / max_meta_updates) print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format( trainer_factory.trainer_config['Brain'].hyperparameters. learning_rate, trainer_factory.trainer_config['Brain'].hyperparameters.beta, trainer_factory.trainer_config['Brain'].hyperparameters.epsilon)) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, sampler_manager, resampling_interval, ) try: # Get inital weights print("Start learning at step: " + str(step) + " meta_step: " + str(counter)) print("Inital weights: " + str(inital_weights[8])) weights_after_train = tc.start_learning(env_manager, inital_weights) print(tc.trainers['Brain'].optimizer) # weights_after_train = tc.weights # print("Trained weights: " + str(weights_after_train[8])) step += options.behaviors['Brain'].max_steps print("meta step:" + str(step)) # print(weights_after_train) # equal = [] # for i, weight in enumerate(tc.weights): # equal.append(np.array_equal(inital_weights[i], weights_after_train[i])) # print(all(equal)) finally: print(len(weights_after_train), len(inital_weights)) for i, weight in enumerate(weights_after_train): inital_weights[i] = weights_after_train[i] env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir) counter += 1
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" else: model_path = f"/{options.docker_target_name}/models/{options.run_id}" summaries_dir = f"/{options.docker_target_name}/summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length" ], ) tb_writer = TensorboardWriter(summaries_dir) StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) if options.env_path is None: port = UnityEnvironment.DEFAULT_EDITOR_PORT env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed): seed = 27 TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed) numpy_random_seed.assert_called_with(seed) tensorflow_set_seed.assert_called_with(seed)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from is not None else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure Tensorboard Writers and StatsReporter tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ with hierarchical_timer("run_training.setup"): checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings base_path = "results" write_path = os.path.join(base_path, checkpoint_settings.run_id) maybe_init_path = ( os.path.join(base_path, checkpoint_settings.initialize_from) if checkpoint_settings.initialize_from else None ) run_logs_dir = os.path.join(write_path, "run_logs") port: Optional[int] = env_settings.base_port # Check if directory exists handle_existing_directories( write_path, checkpoint_settings.resume, checkpoint_settings.force, maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( write_path, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length", ], ) tb_writer = TensorboardWriter( write_path, clear_past_data=not checkpoint_settings.resume ) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) StatsReporter.add_writer(gauge_write) StatsReporter.add_writer(console_writer) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) engine_config = EngineConfig( width=engine_settings.width, height=engine_settings.height, quality_level=engine_settings.quality_level, time_scale=engine_settings.time_scale, target_frame_rate=engine_settings.target_frame_rate, capture_frame_rate=engine_settings.capture_frame_rate, ) env_manager = SubprocessEnvManager( env_factory, engine_config, env_settings.num_envs ) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum, env_manager, restore=checkpoint_settings.resume ) maybe_add_samplers(options.parameter_randomization, env_manager, run_seed) trainer_factory = TrainerFactory( options.behaviors, write_path, not checkpoint_settings.inference, checkpoint_settings.resume, run_seed, maybe_init_path, maybe_meta_curriculum, False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, write_path, checkpoint_settings.run_id, maybe_meta_curriculum, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def run_training(run_seed: int, options: RunOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=[ "Environment/Cumulative Reward", "Environment/Episode Length" ], ) tb_writer = TensorboardWriter(summaries_dir) StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) if options.env_path is None: port = 5004 # This is the in Editor Training Port env_factory = create_environment_factory(options.env_path, options.no_graphics, run_seed, port, options.env_args, options.env_id, options.n_steps) env_manager = SubprocessEnvManager(env_factory=env_factory, n_env=options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( options.curriculum_config, env_manager, options.lesson) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_config, run_seed) trainer_factory = TrainerFactory( options.trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController(trainer_factory=trainer_factory, model_path=model_path, summaries_dir=summaries_dir, run_id=options.run_id, save_freq=options.save_freq, meta_curriculum=maybe_meta_curriculum, train=options.train_model, training_seed=run_seed, sampler_manager=sampler_manager, resampling_interval=resampling_interval, n_steps=options.n_steps) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()
def run_training( sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue ) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters trainer_config_path = options.trainer_config_path curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = "./train/{run_id}-{sub_id}".format( run_id=options.run_id, sub_id=sub_id ) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=options.docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=options.docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=options.docker_target_name, run_id=options.run_id, sub_id=sub_id, ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=options.docker_target_name ) trainer_config = load_config(trainer_config_path) env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, options.base_port + (sub_id * options.num_envs), options.env_args, ) env = SubprocessEnvManager(env_factory, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( curriculum_folder, env, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_file_path, env.reset_parameters, run_seed ) trainers = initialize_trainers( trainer_config, env.external_brains, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainers, model_path, summaries_dir, options.run_id + "-" + str(sub_id), options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, options.fast_simulation, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env)
def run_training( sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue ) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters trainer_config_path = options.trainer_config_path curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = "./models/{run_id}-{sub_id}".format( run_id=options.run_id, sub_id=sub_id ) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=options.docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=options.docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=options.docker_target_name, run_id=options.run_id, sub_id=sub_id, ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=options.docker_target_name ) trainer_config = load_config(trainer_config_path) port = options.base_port + (sub_id * options.num_envs) if options.env_path is None: port = 5004 # This is the in Editor Training Port env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( curriculum_folder, env_manager, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_file_path, run_seed ) trainer_factory = TrainerFactory( trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id + "-" + str(sub_id), options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close()
def run_training( sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue, inject_create_environment_factory: None ) -> None: """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters docker_target_name = ( run_options["--docker-target-name"] if run_options["--docker-target-name"] != "None" else None ) # General parameters env_path = run_options["--env"] if run_options["--env"] != "None" else None run_id = run_options["--run-id"] load_model = run_options["--load"] train_model = run_options["--train"] save_freq = int(run_options["--save-freq"]) keep_checkpoints = int(run_options["--keep-checkpoints"]) base_port = int(run_options["--base-port"]) num_envs = int(run_options["--num-envs"]) curriculum_folder = ( run_options["--curriculum"] if run_options["--curriculum"] != "None" else None ) lesson = int(run_options["--lesson"]) fast_simulation = not bool(run_options["--slow"]) no_graphics = run_options["--no-graphics"] multi_gpu = run_options["--multi-gpu"] trainer_config_path = run_options["<trainer-config-path>"] sampler_file_path = ( run_options["--sampler"] if run_options["--sampler"] != "None" else None ) # Recognize and use docker volume if one is passed as an argument if not docker_target_name: model_path = "./train/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id) summaries_dir = "./summaries" else: trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format( docker_target_name=docker_target_name, trainer_config_path=trainer_config_path, ) if curriculum_folder is not None: curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format( docker_target_name=docker_target_name, curriculum_folder=curriculum_folder, ) model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format( docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id ) summaries_dir = "/{docker_target_name}/summaries".format( docker_target_name=docker_target_name ) trainer_config = load_config(trainer_config_path) if inject_create_environment_factory is None: env_factory = create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs), ) else: env_factory = inject_create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs), ) # HACK for debug use SimpleEnvManager if num_envs > 1: # create a mock env for parsin examples (kill internal) mock_env = env_factory(9999) # from minerl_to_mlagent_wrapper import MineRLToMLAgentWrapper # MineRLToMLAgentWrapper.set_wrappers_for_pretraining(mock_env.brain_names[0], mock_env) # close inner minerl enviroment try: for k, v in mock_env._envs.items(): for e in v: e.unwrapped.close() except AttributeError: mock_env.close() env = SubprocessEnvManager(env_factory, num_envs) else: env = env_factory(0) env = SimpleEnvManager(env) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson) sampler_manager, resampling_interval = create_sampler_manager( sampler_file_path, env.reset_parameters, run_seed ) trainers = initialize_trainers( trainer_config, env.external_brains, summaries_dir, run_id, model_path, keep_checkpoints, train_model, load_model, run_seed, maybe_meta_curriculum, multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainers, model_path, summaries_dir, run_id + "-" + str(sub_id), save_freq, maybe_meta_curriculum, train_model, run_seed, fast_simulation, sampler_manager, resampling_interval, ) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env)
def run_training(sub_id: int, run_seed: int, run_options, process_queue): """ Launches training session. :param process_queue: Queue used to send signal back to main. :param sub_id: Unique id for training session. :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters docker_target_name = (run_options['--docker-target-name'] if run_options['--docker-target-name'] != 'None' else None) # General parameters env_path = (run_options['--env'] if run_options['--env'] != 'None' else None) run_id = run_options['--run-id'] load_model = run_options['--load'] train_model = run_options['--train'] save_freq = int(run_options['--save-freq']) keep_checkpoints = int(run_options['--keep-checkpoints']) base_port = int(run_options['--base-port']) num_envs = int(run_options['--num-envs']) curriculum_folder = (run_options['--curriculum'] if run_options['--curriculum'] != 'None' else None) lesson = int(run_options['--lesson']) fast_simulation = not bool(run_options['--slow']) no_graphics = run_options['--no-graphics'] trainer_config_path = run_options['<trainer-config-path>'] # Recognize and use docker volume if one is passed as an argument if not docker_target_name: model_path = './models/{run_id}-{sub_id}'.format(run_id=run_id, sub_id=sub_id) summaries_dir = './summaries' else: trainer_config_path = \ '/{docker_target_name}/{trainer_config_path}'.format( docker_target_name=docker_target_name, trainer_config_path=trainer_config_path) if curriculum_folder is not None: curriculum_folder = \ '/{docker_target_name}/{curriculum_folder}'.format( docker_target_name=docker_target_name, curriculum_folder=curriculum_folder) model_path = '/{docker_target_name}/models/{run_id}-{sub_id}'.format( docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id) summaries_dir = '/{docker_target_name}/summaries'.format( docker_target_name=docker_target_name) trainer_config = load_config(trainer_config_path) env_factory = create_environment_factory( env_path, docker_target_name, no_graphics, run_seed, base_port + (sub_id * num_envs) ) env = SubprocessUnityEnvironment(env_factory, num_envs) maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env) # Create controller and begin training. tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id), save_freq, maybe_meta_curriculum, load_model, train_model, keep_checkpoints, lesson, env.external_brains, run_seed, fast_simulation) # Signal that environment has been launched. process_queue.put(True) # Begin training tc.start_learning(env, trainer_config)
def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=True, visual_inputs=1) tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '', "tests/test_mlagents.trainers.py", False) assert (tc.env.brain_names[0] == 'RealFakeBrain')
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None: """ Launches training session. :param run_seed: Random seed used for training. :param num_areas: Number of training areas to instantiate :param options: parsed command line arguments """ with hierarchical_timer("run_training.setup"): torch_utils.set_torch_config(options.torch_settings) checkpoint_settings = options.checkpoint_settings env_settings = options.env_settings engine_settings = options.engine_settings run_logs_dir = checkpoint_settings.run_logs_dir port: Optional[int] = env_settings.base_port # Check if directory exists validate_existing_directories( checkpoint_settings.write_path, checkpoint_settings.resume, checkpoint_settings.force, checkpoint_settings.maybe_init_path, ) # Make run logs directory os.makedirs(run_logs_dir, exist_ok=True) # Load any needed states in case of resume if checkpoint_settings.resume: GlobalTrainingStatus.load_state( os.path.join(run_logs_dir, "training_status.json") ) # In case of initialization, set full init_path for all behaviors elif checkpoint_settings.maybe_init_path is not None: setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path) # Configure Tensorboard Writers and StatsReporter stats_writers = register_stats_writer_plugins(options) for sw in stats_writers: StatsReporter.add_writer(sw) if env_settings.env_path is None: port = None env_factory = create_environment_factory( env_settings.env_path, engine_settings.no_graphics, run_seed, num_areas, port, env_settings.env_args, os.path.abspath(run_logs_dir), # Unity environment requires absolute path ) env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs) env_parameter_manager = EnvironmentParameterManager( options.environment_parameters, run_seed, restore=checkpoint_settings.resume ) trainer_factory = TrainerFactory( trainer_config=options.behaviors, output_path=checkpoint_settings.write_path, train_model=not checkpoint_settings.inference, load_model=checkpoint_settings.resume, seed=run_seed, param_manager=env_parameter_manager, init_path=checkpoint_settings.maybe_init_path, multi_gpu=False, ) # Create controller and begin training. tc = TrainerController( trainer_factory, checkpoint_settings.write_path, checkpoint_settings.run_id, env_parameter_manager, not checkpoint_settings.inference, run_seed, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() write_run_options(checkpoint_settings.write_path, options) write_timing_tree(run_logs_dir) write_training_status(run_logs_dir)
def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config, dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config): open_name = 'mlagents.trainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1, 1, 1, '', "tests/test_mlagents.trainers.py", False) # Test for PPO trainer mock_load.return_value = dummy_config config = tc._load_config() tf.reset_default_graph() tc._initialize_trainers(config) assert (len(tc.trainers) == 1) assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer)) # Test for Online Behavior Cloning Trainer mock_load.return_value = dummy_online_bc_config config = tc._load_config() tf.reset_default_graph() tc._initialize_trainers(config) assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer)) # Test for proper exception when trainer name is incorrect mock_load.return_value = dummy_bad_config config = tc._load_config() tf.reset_default_graph() with pytest.raises(UnityEnvironmentException): tc._initialize_trainers(config)