def test_step_takes_steps_for_all_non_waiting_envs(self):
     SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
         worker_id, EnvironmentResponse("step", worker_id, worker_id)
     )
     manager = SubprocessEnvManager(mock_env_factory, 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse("step", 0, StepResponse(0, None)),
         EnvironmentResponse("step", 1, StepResponse(1, None)),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager.step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with("step", step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.current_all_brain_info, i
             )
             self.assertEqual(
                 manager.env_workers[i].previous_step.previous_all_brain_info,
                 last_steps[i].current_all_brain_info,
             )
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
Exemplo n.º 2
0
    def advance(self, env: SubprocessEnvManager) -> int:
        with hierarchical_timer("env_step"):
            time_start_step = time()
            new_step_infos = env.step()
            delta_time_step = time() - time_start_step

        for step_info in new_step_infos:
            for brain_name, trainer in self.trainers.items():
                if brain_name in self.trainer_metrics:
                    self.trainer_metrics[brain_name].add_delta_step(
                        delta_time_step)
                trainer.add_experiences(
                    step_info.previous_all_brain_info,
                    step_info.current_all_brain_info,
                    step_info.brain_name_to_action_info[brain_name].outputs,
                )
                trainer.process_experiences(step_info.previous_all_brain_info,
                                            step_info.current_all_brain_info)
        for brain_name, trainer in self.trainers.items():
            if brain_name in self.trainer_metrics:
                self.trainer_metrics[brain_name].add_delta_step(
                    delta_time_step)
            if self.train_model and trainer.get_step <= trainer.get_max_steps:
                trainer.increment_step(len(new_step_infos))
                if trainer.is_ready_update():
                    # Perform gradient descent with experience buffer
                    with hierarchical_timer("update_policy"):
                        trainer.update_policy()
                    env.set_policy(brain_name, trainer.policy)
        return len(new_step_infos)
 def test_reset_passes_reset_params(self):
     SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
         worker_id, EnvironmentResponse("reset", worker_id, worker_id)
     )
     manager = SubprocessEnvManager(mock_env_factory, 1)
     params = {"test": "params"}
     manager.reset(params, False)
     manager.env_workers[0].send.assert_called_with("reset", (params, False, None))
Exemplo n.º 4
0
    def test_reset_collects_results_from_all_envs(self):
        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
            worker_id, EnvironmentResponse("reset", worker_id, worker_id))
        manager = SubprocessEnvManager(mock_env_factory, 4)

        params = {"test": "params"}
        res = manager.reset(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with("reset", (params, True, None))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_brain_info, i)
        assert res == list(
            map(lambda ew: ew.previous_step, manager.env_workers))
Exemplo n.º 5
0
 def test_environments_are_created(self):
     SubprocessEnvManager.create_worker = MagicMock()
     env = SubprocessEnvManager(mock_env_factory, 2)
     # Creates two processes
     env.create_worker.assert_has_calls([
         mock.call(0, env.step_queue, mock_env_factory),
         mock.call(1, env.step_queue, mock_env_factory),
     ])
     self.assertEqual(len(env.env_workers), 2)
Exemplo n.º 6
0
def run_training(sub_id: int, run_seed: int, run_options: Dict[str, Any],
                 process_queue: Queue) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (run_options["--docker-target-name"]
                          if run_options["--docker-target-name"] != "None" else
                          None)

    # General parameters
    env_path = run_options["--env"] if run_options["--env"] != "None" else None
    run_id = run_options["--run-id"]
    load_model = run_options["--load"]
    train_model = run_options["--train"]
    save_freq = int(run_options["--save-freq"])
    keep_checkpoints = int(run_options["--keep-checkpoints"])
    base_port = int(run_options["--base-port"])
    num_envs = int(run_options["--num-envs"])
    curriculum_folder = (run_options["--curriculum"]
                         if run_options["--curriculum"] != "None" else None)
    lesson = int(run_options["--lesson"])
    fast_simulation = not bool(run_options["--slow"])
    no_graphics = run_options["--no-graphics"]
    trainer_config_path = run_options["<trainer-config-path>"]
    sampler_file_path = (run_options["--sampler"]
                         if run_options["--sampler"] != "None" else None)

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id,
                                                         sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=docker_target_name,
            run_id=run_id,
            sub_id=sub_id)
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=docker_target_name)

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        env_path,
        docker_target_name,
        no_graphics,
        run_seed,
        base_port + (sub_id * num_envs),
        list([str(x) for t in run_options.items()
              for x in t]),  # NOTE passes all arguments to Unity
    )
    env = SubprocessEnvManager(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
    sampler_manager, resampling_interval = create_sampler_manager(
        sampler_file_path, env.reset_parameters)

    # Create controller and begin training.
    tc = TrainerController(
        model_path,
        summaries_dir,
        run_id + "-" + str(sub_id),
        save_freq,
        maybe_meta_curriculum,
        load_model,
        train_model,
        keep_checkpoints,
        lesson,
        run_seed,
        fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env, trainer_config)
Exemplo n.º 7
0
def run_training(
    sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    trainer_config_path = options.trainer_config_path
    curriculum_folder = options.curriculum_folder
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(
            run_id=options.run_id, sub_id=sub_id
        )
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=options.docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )
    trainer_config = load_config(trainer_config_path)
    port = options.base_port + (sub_id * options.num_envs)
    if options.env_path is None:
        port = 5004  # This is the in Editor Training Port
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        port,
        options.env_args,
    )
    engine_config = EngineConfig(
        options.width,
        options.height,
        options.quality_level,
        options.time_scale,
        options.target_frame_rate,
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        curriculum_folder, env_manager, options.lesson
    )
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, run_seed
    )
    trainer_factory = TrainerFactory(
        trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    # Signal that environment has been launched.
    process_queue.put(True)
    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
def run_training(
    sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue, inject_create_environment_factory: None
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (
        run_options["--docker-target-name"]
        if run_options["--docker-target-name"] != "None"
        else None
    )

    # General parameters
    env_path = run_options["--env"] if run_options["--env"] != "None" else None
    run_id = run_options["--run-id"]
    load_model = run_options["--load"]
    train_model = run_options["--train"]
    save_freq = int(run_options["--save-freq"])
    keep_checkpoints = int(run_options["--keep-checkpoints"])
    base_port = int(run_options["--base-port"])
    num_envs = int(run_options["--num-envs"])
    curriculum_folder = (
        run_options["--curriculum"] if run_options["--curriculum"] != "None" else None
    )
    lesson = int(run_options["--lesson"])
    fast_simulation = not bool(run_options["--slow"])
    no_graphics = run_options["--no-graphics"]
    multi_gpu = run_options["--multi-gpu"]
    trainer_config_path = run_options["<trainer-config-path>"]
    sampler_file_path = (
        run_options["--sampler"] if run_options["--sampler"] != "None" else None
    )

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = "./train/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    if inject_create_environment_factory is None:
        env_factory = create_environment_factory(
            env_path,
            docker_target_name,
            no_graphics,
            run_seed,
            base_port + (sub_id * num_envs),
        )
    else:
        env_factory = inject_create_environment_factory(
            env_path,
            docker_target_name,
            no_graphics,
            run_seed,
            base_port + (sub_id * num_envs),
        )
    
    # HACK for debug use SimpleEnvManager
    if num_envs > 1:
        # create a mock env for parsin examples (kill internal)
        mock_env = env_factory(9999)
        # from minerl_to_mlagent_wrapper import MineRLToMLAgentWrapper
        # MineRLToMLAgentWrapper.set_wrappers_for_pretraining(mock_env.brain_names[0], mock_env)
        # close inner minerl enviroment
        try:
            for k, v in mock_env._envs.items():
                for e in v:
                        e.unwrapped.close()
        except AttributeError:
            mock_env.close()
        env = SubprocessEnvManager(env_factory, num_envs)
    else:
        env = env_factory(0)
        env = SimpleEnvManager(env)

    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        run_id,
        model_path,
        keep_checkpoints,
        train_model,
        load_model,
        run_seed,
        maybe_meta_curriculum,
        multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        run_id + "-" + str(sub_id),
        save_freq,
        maybe_meta_curriculum,
        train_model,
        run_seed,
        fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)
Exemplo n.º 9
0
def run_training(
    sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters

    trainer_config_path = options.trainer_config_path
    curriculum_folder = options.curriculum_folder

    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./train/{run_id}-{sub_id}".format(
            run_id=options.run_id, sub_id=sub_id
        )
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=options.docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        options.base_port + (sub_id * options.num_envs),
        options.env_args,
    )
    env = SubprocessEnvManager(env_factory, options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        curriculum_folder, env, options.lesson
    )
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        options.fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)