示例#1
0
 def _create_engine_channel(self):
     engine_channel = EngineConfigurationChannel()
     engine_config = EngineConfig(80, 80, 1, 4.0, 30 *
                                  4) if self.train_mode else EngineConfig(
                                      1280, 720, 1, 1.0, 60)
     engine_channel.set_configuration(engine_config)
     return engine_channel
示例#2
0
 def test_environments_are_created(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     env = SubprocessEnvManager(mock_env_factory,
                                EngineConfig.default_config(), 2)
     # Creates two processes
     env.create_worker.assert_has_calls([
         mock.call(0, env.step_queue, mock_env_factory,
                   EngineConfig.default_config()),
         mock.call(1, env.step_queue, mock_env_factory,
                   EngineConfig.default_config()),
     ])
     self.assertEqual(len(env.env_workers), 2)
 def test_step_takes_steps_for_all_non_waiting_envs(self):
     SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
         worker_id, EnvironmentResponse("step", worker_id, worker_id))
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse("step", 0, StepResponse(0, None)),
         EnvironmentResponse("step", 1, StepResponse(1, None)),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager.step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with("step", step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.
                 current_all_brain_info, i)
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
def make_chickAI_unity_env(options):
    """
    Build ChickAI UnityEnvironment from command line options.
    """
    engine_config = EngineConfig(
        width=options.width,
        height=options.height,
        quality_level=options.quality_level,
        time_scale=options.time_scale,
        target_frame_rate=options.target_frame_rate,
        capture_frame_rate=options.capture_frame_rate,
    )
    env_args = _build_chickAI_env_args(
        input_resolution=options.input_resolution,
        episode_steps=options.episode_steps,
        video_1_path=options.video1,
        video_2_path=options.video2,
        log_dir=options.log_dir,
        test_mode=options.test_mode)
    # Set up FloatPropertiesChannel to receive auxiliary agent information.
    agent_info_channel = FloatPropertiesChannel()
    unity_env = make_unity_env(env_path=options.env_path,
                               port=options.base_port,
                               seed=options.seed,
                               env_args=env_args,
                               engine_config=engine_config,
                               side_channels=[agent_info_channel])
    return unity_env, agent_info_channel
示例#5
0
 def test_step_takes_steps_for_all_non_waiting_envs(self,
                                                    mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 3)
     manager.step_queue = Mock()
     manager.step_queue.get_nowait.side_effect = [
         EnvironmentResponse(EnvironmentCommand.STEP, 0,
                             StepResponse(0, None, {})),
         EnvironmentResponse(EnvironmentCommand.STEP, 1,
                             StepResponse(1, None, {})),
         EmptyQueue(),
     ]
     step_mock = Mock()
     last_steps = [Mock(), Mock(), Mock()]
     manager.env_workers[0].previous_step = last_steps[0]
     manager.env_workers[1].previous_step = last_steps[1]
     manager.env_workers[2].previous_step = last_steps[2]
     manager.env_workers[2].waiting = True
     manager._take_step = Mock(return_value=step_mock)
     res = manager._step()
     for i, env in enumerate(manager.env_workers):
         if i < 2:
             env.send.assert_called_with(EnvironmentCommand.STEP, step_mock)
             manager.step_queue.get_nowait.assert_called()
             # Check that the "last steps" are set to the value returned for each step
             self.assertEqual(
                 manager.env_workers[i].previous_step.
                 current_all_step_result, i)
     assert res == [
         manager.env_workers[0].previous_step,
         manager.env_workers[1].previous_step,
     ]
 def test_reset_passes_reset_params(self, mock_create_worker):
     mock_create_worker.side_effect = create_worker_mock
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 1)
     params = {"test": "params"}
     manager._reset_env(params)
     manager.env_workers[0].send.assert_called_with("reset", (params))
    def test_advance(self, external_brains_mock, step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
            worker_id, EnvironmentResponse("step", worker_id, worker_id)
        )
        env_manager = SubprocessEnvManager(
            mock_env_factory, EngineConfig.default_config(), 3
        )
        external_brains_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: Mock()}
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict)
        step_mock.return_value = [step_info]
        env_manager.advance()

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy
        env_manager.advance()
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
示例#8
0
def test_engine_configuration():
    sender = EngineConfigurationChannel()
    # We use a raw bytes channel to interpred the data
    receiver = RawBytesChannel(sender.channel_id)

    config = EngineConfig.default_config()
    sender.set_configuration(config)
    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    received_data = receiver.get_and_clear_received_messages()
    assert len(received_data) == 5  # 5 different messages one for each setting

    sent_time_scale = 4.5
    sender.set_configuration_parameters(time_scale=sent_time_scale)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
    message.read_int32()
    time_scale = message.read_float32()
    assert time_scale == sent_time_scale

    with pytest.raises(UnitySideChannelException):
        sender.set_configuration_parameters(width=None, height=42)

    with pytest.raises(UnityCommunicationException):
        # try to send data to the EngineConfigurationChannel
        sender.set_configuration_parameters(time_scale=sent_time_scale)
        data = SideChannelManager([sender]).generate_side_channel_messages()
        SideChannelManager([sender]).process_side_channel_message(data)
 def test_reset_passes_reset_params(self):
     SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
         worker_id, EnvironmentResponse("reset", worker_id, worker_id))
     manager = SubprocessEnvManager(mock_env_factory,
                                    EngineConfig.default_config(), 1)
     params = {"test": "params"}
     manager.reset(params)
     manager.env_workers[0].send.assert_called_with("reset", (params))
示例#10
0
    def create_engine_config_side_channel(self) -> EngineConfigurationChannel:

        if self.play or self.inference:
            engine_configuration = EngineConfig(
                width=self.WINDOW_WIDTH.play,
                height=self.WINDOW_HEIGHT.play,
                quality_level=self.QUALITY_LEVEL.play,
                time_scale=self.TIMESCALE.play,
                target_frame_rate=self.TARGET_FRAME_RATE.play,
            )
        else:
            engine_configuration = EngineConfig(
                width=self.WINDOW_WIDTH.train,
                height=self.WINDOW_HEIGHT.train,
                quality_level=self.QUALITY_LEVEL.train,
                time_scale=self.TIMESCALE.train,
                target_frame_rate=self.TARGET_FRAME_RATE.train,
            )
        engine_configuration_channel = EngineConfigurationChannel()
        engine_configuration_channel.set_configuration(engine_configuration)
        return engine_configuration_channel
示例#11
0
def _make_unity_env(
        env_path: Optional[str] = None,
        port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT,
        seed: int = -1,
        env_args: Optional[List[str]] = None,
        engine_config: Optional[EngineConfig] = None,
        side_channels: Optional[List[SideChannel]] = None) -> UnityEnvironment:
    """
    Create a UnityEnvironment.
    """
    # Use Unity Editor if env file is not provided.
    if env_path is None:
        port = UnityEnvironment.DEFAULT_EDITOR_PORT
    else:
        launch_string = UnityEnvironment.validate_environment_path(env_path)
        if launch_string is None:
            raise UnityEnvironmentException(
                f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."
            )
        logger.info(f"Starting environment from {env_path}.")

    # Configure Unity Engine.
    if engine_config is None:
        engine_config = EngineConfig.default_config()

    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_config)

    if side_channels is None:
        side_channels = [engine_configuration_channel]
    else:
        side_channels.append(engine_configuration_channel)

    # Find an available port to connect to Unity environment.
    while True:
        try:
            env = UnityEnvironment(
                file_name=env_path,
                seed=seed,
                base_port=port,
                args=env_args,
                side_channels=side_channels,
            )
        except UnityWorkerInUseException:
            logger.debug(f"port {port} in use.")
            port += 1
        else:
            logger.info(f"Connected to environment using port {port}.")
            break

    return env
示例#12
0
def test_subprocess_env_raises_errors(num_envs):
    def failing_env_factory(worker_id, config):
        import time

        # Sleep momentarily to allow time for the EnvManager to be waiting for the
        # subprocess response.  We won't be able to capture failures from the subprocess
        # that cause it to close the pipe before we can send the first message.
        time.sleep(0.1)
        raise UnityEnvironmentException()

    env_manager = SubprocessEnvManager(failing_env_factory,
                                       EngineConfig.default_config(), num_envs)
    with pytest.raises(UnityEnvironmentException):
        env_manager.reset()
    env_manager.close()
    def test_reset_collects_results_from_all_envs(self):
        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
            worker_id, EnvironmentResponse("reset", worker_id, worker_id))
        manager = SubprocessEnvManager(mock_env_factory,
                                       EngineConfig.default_config(), 4)

        params = {"test": "params"}
        res = manager.reset(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with("reset", (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_brain_info, i)
        assert res == list(
            map(lambda ew: ew.previous_step, manager.env_workers))
示例#14
0
    def test_reset_collects_results_from_all_envs(self, mock_create_worker):
        mock_create_worker.side_effect = create_worker_mock
        manager = SubprocessEnvManager(mock_env_factory,
                                       EngineConfig.default_config(), 4)

        params = {"test": "params"}
        res = manager._reset_env(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with(EnvironmentCommand.RESET, (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(
                manager.env_workers[i].previous_step.current_all_step_result,
                i)
        assert res == list(
            map(lambda ew: ew.previous_step, manager.env_workers))
def test_subprocess_env_endtoend(num_envs):
    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    trainer_config = generate_config(PPO_CONFIG)
    # Run PPO using env_manager
    _check_environment_trains(
        simple_env_factory(0, []),
        trainer_config,
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.99
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
示例#16
0
 def __init__(
     self,
     env_path: Optional[str] = None,
     imprint_video: Optional[str] = None,
     test_video: Optional[str] = None,
     log_dir: Optional[str] = None,
     input_resolution: int = 64,
     episode_steps: int = 1000,
     seed: int = 0,
     test_mode: bool = False,
     base_port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT,
     time_scale: int = 20,
     capture_frame_rate: int = 60,
     width: int = 80,
     height: int = 80,
     use_visual: bool = True,
     **kwargs,
 ):
     engine_config = EngineConfig(
         width=width,
         height=height,
         quality_level=5,
         time_scale=time_scale,
         target_frame_rate=-1,
         capture_frame_rate=capture_frame_rate,
     )
     env_args = _build_chickAI_env_args(input_resolution=input_resolution,
                                        episode_steps=episode_steps,
                                        imprint_video=imprint_video,
                                        test_video=test_video,
                                        log_dir=log_dir,
                                        test_mode=test_mode)
     agent_info_channel = FloatPropertiesChannel()
     unity_env = _make_unity_env(env_path=env_path,
                                 port=base_port,
                                 seed=seed,
                                 env_args=env_args,
                                 engine_config=engine_config,
                                 side_channels=[agent_info_channel])
     env = UnityToGymWrapper(unity_env,
                             flatten_branched=True,
                             use_visual=use_visual)
     super().__init__(env)
     self.env = env
     self.agent_info_channel = agent_info_channel
示例#17
0
def test_subprocess_failing_step(num_envs):
    def failing_step_env_factory(_worker_id, _config):
        env = UnexpectedExceptionEnvironment(["1D"],
                                             use_discrete=True,
                                             to_raise=CustomTestOnlyException)
        return env

    env_manager = SubprocessEnvManager(failing_step_env_factory,
                                       EngineConfig.default_config())
    # Expect the exception raised to be routed back up to the top level.
    with pytest.raises(CustomTestOnlyException):
        check_environment_trains(
            failing_step_env_factory(0, []),
            {"1D": ppo_dummy_config()},
            env_manager=env_manager,
            success_threshold=None,
        )
    env_manager.close()
示例#18
0
def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
        env = SimpleEnvironment(["1D"], use_discrete=True)
        return env

    env_manager = SubprocessEnvManager(simple_env_factory,
                                       EngineConfig.default_config(), num_envs)
    # Run PPO using env_manager
    check_environment_trains(
        simple_env_factory(0, []),
        {"1D": ppo_dummy_config()},
        env_manager=env_manager,
        success_threshold=None,
    )
    # Note we can't check the env's rewards directly (since they're in separate processes) so we
    # check the StatsReporter's debug stat writer's last reward.
    assert isinstance(StatsReporter.writers[0], DebugWriter)
    assert all(val > 0.7
               for val in StatsReporter.writers[0].get_last_rewards().values())
    env_manager.close()
示例#19
0
    def test_advance(self, mock_create_worker, training_behaviors_mock,
                     step_mock):
        brain_name = "testbrain"
        action_info_dict = {brain_name: MagicMock()}
        mock_create_worker.side_effect = create_worker_mock
        env_manager = SubprocessEnvManager(mock_env_factory,
                                           EngineConfig.default_config(), 3)
        training_behaviors_mock.return_value = [brain_name]
        agent_manager_mock = mock.Mock()
        mock_policy = mock.Mock()
        agent_manager_mock.policy_queue.get_nowait.side_effect = [
            mock_policy,
            mock_policy,
            AgentManagerQueue.Empty(),
        ]
        env_manager.set_agent_manager(brain_name, agent_manager_mock)

        step_info_dict = {brain_name: (Mock(), Mock())}
        env_stats = {
            "averaged": (1.0, StatsAggregationMethod.AVERAGE),
            "most_recent": (2.0, StatsAggregationMethod.MOST_RECENT),
        }
        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict,
                                    env_stats)
        step_mock.return_value = [step_info]
        env_manager.process_steps(env_manager.get_steps())

        # Test add_experiences
        env_manager._step.assert_called_once()

        agent_manager_mock.add_experiences.assert_called_once_with(
            step_info.current_all_step_result[brain_name][0],
            step_info.current_all_step_result[brain_name][1],
            0,
            step_info.brain_name_to_action_info[brain_name],
        )

        # Test policy queue
        assert env_manager.policies[brain_name] == mock_policy
        assert agent_manager_mock.policy == mock_policy
示例#20
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = f"./models/{options.run_id}"
        summaries_dir = "./summaries"
    else:
        model_path = f"/{options.docker_target_name}/models/{options.run_id}"
        summaries_dir = f"/{options.docker_target_name}/summaries"
    port = options.base_port

    # Configure CSV, Tensorboard Writers and StatsReporter
    # We assume reward and episode length are needed in the CSV.
    csv_writer = CSVWriter(
        summaries_dir,
        required_fields=[
            "Environment/Cumulative Reward", "Environment/Episode Length"
        ],
    )
    tb_writer = TensorboardWriter(summaries_dir)
    StatsReporter.add_writer(tb_writer)
    StatsReporter.add_writer(csv_writer)

    if options.env_path is None:
        port = UnityEnvironment.DEFAULT_EDITOR_PORT
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        port,
        options.env_args,
    )
    engine_config = EngineConfig(
        options.width,
        options.height,
        options.quality_level,
        options.time_scale,
        options.target_frame_rate,
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum_config, env_manager, options.lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_config, run_seed)
    trainer_factory = TrainerFactory(
        options.trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id,
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
示例#21
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        model_path = f"./models/{options.run_id}"
        maybe_init_path = (
            f"./models/{options.initialize_from}" if options.initialize_from else None
        )
        summaries_dir = "./summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            summaries_dir,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        handle_existing_directories(
            model_path, summaries_dir, options.resume, options.force, maybe_init_path
        )
        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if options.env_path is None:
            port = UnityEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory(
            options.env_path, options.no_graphics, run_seed, port, options.env_args
        )
        engine_config = EngineConfig(
            width=options.width,
            height=options.height,
            quality_level=options.quality_level,
            time_scale=options.time_scale,
            target_frame_rate=options.target_frame_rate,
            capture_frame_rate=options.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum_config, env_manager, options.lesson
        )
        sampler_manager, resampling_interval = create_sampler_manager(
            options.sampler_config, run_seed
        )
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            options.run_id,
            model_path,
            options.keep_checkpoints,
            not options.inference,
            options.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            options.multi_gpu,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            model_path,
            summaries_dir,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
            not options.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_timing_tree(summaries_dir, options.run_id)
示例#22
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from is not None
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )

        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
示例#23
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """

    options.checkpoint_settings.run_id = "test8"

    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (os.path.join(base_path,
                                        checkpoint_settings.initialize_from)
                           if checkpoint_settings.initialize_from else None)
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json"))
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

    engine_config = EngineConfig(
        width=engine_settings.width,
        height=engine_settings.height,
        quality_level=engine_settings.quality_level,
        time_scale=engine_settings.time_scale,
        target_frame_rate=engine_settings.target_frame_rate,
        capture_frame_rate=engine_settings.capture_frame_rate,
    )
    if env_settings.env_path is None:
        port = None
    # Begin training

    env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"
    env_factory = create_environment_factory(
        env_settings.env_path,
        engine_settings.no_graphics,
        run_seed,
        port,
        env_settings.env_args,
        os.path.abspath(
            run_logs_dir),  # Unity environment requires absolute path
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       env_settings.num_envs)

    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum, env_manager, restore=checkpoint_settings.resume)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.parameter_randomization, run_seed)
    max_steps = options.behaviors['Brain'].max_steps
    options.behaviors['Brain'].max_steps = 10

    trainer_factory = TrainerFactory(options,
                                     write_path,
                                     not checkpoint_settings.inference,
                                     checkpoint_settings.resume,
                                     run_seed,
                                     maybe_init_path,
                                     maybe_meta_curriculum,
                                     False,
                                     total_steps=0)
    trainer_factory.trainer_config[
        'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT

    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        write_path,
        checkpoint_settings.run_id,
        maybe_meta_curriculum,
        not checkpoint_settings.inference,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    try:
        # Get inital weights
        tc.init_weights(env_manager)
        inital_weights = deepcopy(tc.weights)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)

    options.behaviors['Brain'].max_steps = max_steps
    step = 0
    counter = 0
    max_meta_updates = 200
    while counter < max_meta_updates:
        sample = np.random.random_sample()
        if (sample > 1):
            print("Performing Meta-learning on Carry Object stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe"
        else:
            print("Performing Meta-learning on Find Target stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"

        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(
                run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, engine_config,
                                           env_settings.num_envs)

        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum,
            env_manager,
            restore=checkpoint_settings.resume)
        sampler_manager, resampling_interval = create_sampler_manager(
            options.parameter_randomization, run_seed)

        trainer_factory = TrainerFactory(options,
                                         write_path,
                                         not checkpoint_settings.inference,
                                         checkpoint_settings.resume,
                                         run_seed,
                                         maybe_init_path,
                                         maybe_meta_curriculum,
                                         False,
                                         total_steps=step)

        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate = 0.0005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.beta = 0.005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.epsilon = 0.2 * (
                1 - counter / max_meta_updates)
        print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format(
            trainer_factory.trainer_config['Brain'].hyperparameters.
            learning_rate,
            trainer_factory.trainer_config['Brain'].hyperparameters.beta,
            trainer_factory.trainer_config['Brain'].hyperparameters.epsilon))

        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )
        try:
            # Get inital weights
            print("Start learning at step: " + str(step) + " meta_step: " +
                  str(counter))
            print("Inital weights: " + str(inital_weights[8]))
            weights_after_train = tc.start_learning(env_manager,
                                                    inital_weights)

            print(tc.trainers['Brain'].optimizer)

            # weights_after_train = tc.weights
            # print("Trained weights: " + str(weights_after_train[8]))
            step += options.behaviors['Brain'].max_steps
            print("meta step:" + str(step))
            # print(weights_after_train)
            # equal = []
            # for i, weight in enumerate(tc.weights):
            #     equal.append(np.array_equal(inital_weights[i], weights_after_train[i]))
            # print(all(equal))
        finally:
            print(len(weights_after_train), len(inital_weights))
            for i, weight in enumerate(weights_after_train):
                inital_weights[i] = weights_after_train[i]
            env_manager.close()
            write_run_options(write_path, options)
            write_timing_tree(run_logs_dir)
            write_training_status(run_logs_dir)
        counter += 1
示例#24
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum, env_manager, restore=checkpoint_settings.resume
        )
        maybe_add_samplers(options.parameter_randomization, env_manager, run_seed)
        trainer_factory = TrainerFactory(
            options.behaviors,
            write_path,
            not checkpoint_settings.inference,
            checkpoint_settings.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
示例#25
0
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    run_options: RunOptions,
    log_level: int = logging_util.INFO,
) -> None:
    env_factory: Callable[
        [int, List[SideChannel]], UnityEnvironment
    ] = cloudpickle.loads(restricted_loads(pickled_env_factory))
    env_parameters = EnvironmentParametersChannel()

    engine_config = EngineConfig(
        width=run_options.engine_settings.width,
        height=run_options.engine_settings.height,
        quality_level=run_options.engine_settings.quality_level,
        time_scale=run_options.engine_settings.time_scale,
        target_frame_rate=run_options.engine_settings.target_frame_rate,
        capture_frame_rate=run_options.engine_settings.capture_frame_rate,
    )
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_config)

    stats_channel = StatsSideChannel()
    training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None
    if worker_id == 0:
        training_analytics_channel = TrainingAnalyticsSideChannel()
    env: UnityEnvironment = None
    # Set log level. On some platforms, the logger isn't common with the
    # main process, so we need to set it again.
    logging_util.set_log_level(log_level)

    def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
        for brain_name in env.behavior_specs:
            all_step_result[brain_name] = env.get_steps(brain_name)
        return all_step_result

    try:
        side_channels = [env_parameters, engine_configuration_channel, stats_channel]
        if training_analytics_channel is not None:
            side_channels.append(training_analytics_channel)

        env = env_factory(worker_id, side_channels)
        if (
            not env.academy_capabilities
            or not env.academy_capabilities.trainingAnalytics
        ):
            # Make sure we don't try to send training analytics if the environment doesn't know how to process
            # them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used.
            training_analytics_channel = None
        if training_analytics_channel:
            training_analytics_channel.environment_initialized(run_options)

        while True:
            req: EnvironmentRequest = parent_conn.recv()
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
                    if len(action_info.agent_ids) > 0:
                        env.set_actions(brain_name, action_info.env_action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                env_stats = stats_channel.get_and_reset_stats()
                step_response = StepResponse(
                    all_step_result, get_timer_root(), env_stats
                )
                step_queue.put(
                    EnvironmentResponse(
                        EnvironmentCommand.STEP, worker_id, step_response
                    )
                )
                reset_timers()
            elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS:
                _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs)
            elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:
                for k, v in req.payload.items():
                    if isinstance(v, ParameterRandomizationSettings):
                        v.apply(k, env_parameters)
            elif req.cmd == EnvironmentCommand.TRAINING_STARTED:
                behavior_name, trainer_config = req.payload
                if training_analytics_channel:
                    training_analytics_channel.training_started(
                        behavior_name, trainer_config
                    )
            elif req.cmd == EnvironmentCommand.RESET:
                env.reset()
                all_step_result = _generate_all_results()
                _send_response(EnvironmentCommand.RESET, all_step_result)
            elif req.cmd == EnvironmentCommand.CLOSE:
                break
    except (
        KeyboardInterrupt,
        UnityCommunicationException,
        UnityTimeOutException,
        UnityEnvironmentException,
        UnityCommunicatorStoppedException,
    ) as ex:
        logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    except Exception as ex:
        logger.exception(
            f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
        )
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    finally:
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        if env is not None:
            env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
        parent_conn.close()
        step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None))
        step_queue.close()
示例#26
0
def run_training(sub_id: int, run_seed: int, options: CommandLineOptions,
                 process_queue: Queue) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    trainer_config_path = options.trainer_config_path
    curriculum_folder = options.curriculum_folder
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(run_id=options.run_id,
                                                         sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=options.docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name)
    trainer_config = load_config(trainer_config_path)
    port = options.base_port + (sub_id * options.num_envs)

    # Configure CSV, Tensorboard Writers and StatsReporter
    # We assume reward and episode length are needed in the CSV.
    csv_writer = CSVWriter(
        summaries_dir,
        required_fields=[
            "Environment/Cumulative Reward", "Environment/Episode Length"
        ],
    )
    tb_writer = TensorboardWriter(summaries_dir)
    StatsReporter.add_writer(tb_writer)
    StatsReporter.add_writer(csv_writer)

    if options.env_path is None:
        port = 5004  # This is the in Editor Training Port
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        port,
        options.env_args,
    )
    engine_config = EngineConfig(
        options.width,
        options.height,
        options.quality_level,
        options.time_scale,
        options.target_frame_rate,
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder,
                                                       env_manager,
                                                       options.lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, run_seed)
    trainer_factory = TrainerFactory(
        trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    # Signal that environment has been launched.
    process_queue.put(True)
    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
示例#27
0
    def apply_config(self):
        # set FloatProperties
        grid_size_x = self.config.get("grid_size_x")
        if not isinstance(grid_size_x, list) or len(grid_size_x) != 2:
            raise ("The provided grid_size_x parameter is no list of type "
                   "[min, max]. Please correct this.")
        grid_size_y = self.config.get("grid_size_y")
        if not isinstance(grid_size_y, list) or len(grid_size_y) != 2:
            raise ("The provided grid_size_y parameter is no list of type "
                   "[min, max]. Please correct this.")

        vis_obs_size = self.config.get("vis_obs_size")
        if not isinstance(vis_obs_size, list) or len(vis_obs_size) != 2:
            raise ("The provided vis_obs_size parameter is no list of type "
                   "[min, max]. Please correct this.")

        base_size_x = self.config.get("base_size_x")
        if not isinstance(base_size_x, list) or len(base_size_x) != 2:
            raise ("The provided base_size_x parameter is no list of type "
                   "[min, max]. Please correct this.")
        base_size_y = self.config.get("base_size_x")
        if not isinstance(base_size_x, list) or len(base_size_x) != 2:
            raise ("The provided base_size_x parameter is no list of type "
                   "[min, max]. Please correct this.")
        num_per_base_type = self.config.get("num_per_base_type")
        if not isinstance(num_per_base_type,
                          list) or len(num_per_base_type) != 2:
            raise (
                "The provided num_per_base_type parameter is no list of type "
                "[min, max]. Please correct this.")

        num_per_item = self.config.get("num_per_item")
        if not isinstance(num_per_item, list) or len(num_per_item) != 2:
            raise ("The provided num_per_item parameter is no list of type "
                   "[min, max]. Please correct this.")

        color_pool = self.config.get("color_pool")
        if not isinstance(color_pool, list):
            raise ("The provided color_pool parameter is not of type list. "
                   "Please correct this.")

        camera_type = self.config.get("camera_type")
        camera_type_f: float = CAMERA_TYPES[camera_type] or 0.0

        # set properties in reset channel
        self.env_param_channel.set_float_parameter("minGridSizeX",
                                                   grid_size_x[0])
        self.env_param_channel.set_float_parameter("maxGridSizeX",
                                                   grid_size_x[1])
        self.env_param_channel.set_float_parameter("minGridSizeY",
                                                   grid_size_y[0])
        self.env_param_channel.set_float_parameter("maxGridSizeY",
                                                   grid_size_y[1])
        self.env_param_channel.set_float_parameter("cameraType", camera_type_f)
        # area settings
        # check if num train areas should be set
        if self.is_already_initialized:
            print("You're trying to change the number of "
                  "train areas, during runtime. This is only possible at "
                  "initialization.")
        else:
            self.env_param_channel.set_float_parameter(
                "numTrainAreas", self.config.get("num_train_areas"))

        self.env_param_channel.set_float_parameter(
            "numBaseTypesToUse", self.config.get("num_base_types"))
        self.env_param_channel.set_float_parameter("numberPerBaseTypeMax",
                                                   num_per_base_type[1])
        self.env_param_channel.set_float_parameter("numberPerBaseTypeMin",
                                                   num_per_base_type[0])
        self.env_param_channel.set_float_parameter("baseSizeXMax",
                                                   base_size_x[1])
        self.env_param_channel.set_float_parameter("baseSizeXMin",
                                                   base_size_x[0])
        self.env_param_channel.set_float_parameter("baseSizeZMax",
                                                   base_size_y[1])
        self.env_param_channel.set_float_parameter("baseSizeZMin",
                                                   base_size_y[0])
        self.env_param_channel.set_float_parameter(
            "baseInCornersOnly",
            1 if self.config.get("base_in_corners_only") else 0)
        self.env_param_channel.set_float_parameter(
            "boxesVanish", 1 if self.config.get("boxes_vanish") else 0)
        self.env_param_channel.set_float_parameter(
            "boxesNeedDrop", 1 if self.config.get("boxes_need_drop") else 0)
        self.env_param_channel.set_float_parameter(
            "sparseReward", 1 if self.config.get("sparse_reward_only") else 0)
        # color settings
        self.env_param_channel.set_float_parameter(
            "noBaseFillColor",
            1 if self.config.get("no_base_fill_color") else 0)
        self.env_param_channel.set_float_parameter(
            "brighterBases", 1 if self.config.get("brighter_bases") else 0)
        self.env_param_channel.set_float_parameter(
            "full_base_line", 1 if self.config.get("fullBaseLine") else 0)
        # item settings
        self.env_param_channel.set_float_parameter(
            "numItemTypesToUse", self.config.get("num_item_types"))
        self.env_param_channel.set_float_parameter("numberPerItemTypeMax",
                                                   num_per_item[1])
        self.env_param_channel.set_float_parameter("numberPerItemTypeMin",
                                                   num_per_item[0])
        # general settings
        self.env_param_channel.set_float_parameter(
            "noDisplay", 1 if self.config.get("no_display") else 0)
        self.env_param_channel.set_float_parameter("visObsWidth",
                                                   vis_obs_size[0])
        self.env_param_channel.set_float_parameter("visObsHeight",
                                                   vis_obs_size[1])
        self.env_param_channel.set_float_parameter(
            "useVisual", 1 if self.config.get("use_visual")
            and not self.config.get("use_object_property_camera") else 0)
        self.env_param_channel.set_float_parameter(
            "useRayPerception",
            1 if self.config.get("use_ray_perception") else 0)
        self.env_param_channel.set_float_parameter(
            "useObjectPropertyCamera",
            1 if self.config.get("use_object_property_camera") else 0)
        self.env_param_channel.set_float_parameter(
            "maxSteps", self.config.get("max_steps"))
        self.env_param_channel.set_float_parameter(
            "taskLevel", self.config.get("task_level"))

        # Read engine config
        engine_config = self.config.get("engine_config")
        # Configure the Engine
        engine_config = EngineConfig(
            width=engine_config.get("window_width"),
            height=engine_config.get("window_height"),
            quality_level=engine_config.get("quality_level"),
            time_scale=engine_config.get("sim_speed"),
            target_frame_rate=engine_config.get("target_frame_rate"),
            capture_frame_rate=60)
        self.engine_channel.set_configuration(engine_config)

        # set list properties
        self.color_pool_channel.set_property("colorPool",
                                             self.config.get("color_pool"))
        self.is_already_initialized = True
示例#28
0
def run_training_aai(run_seed: int, options: RunOptionsAAI) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param options: training parameters
    """
    with hierarchical_timer("run_training.setup"):
        # Recognize and use docker volume if one is passed as an argument
        # if not options.docker_target_name:
        model_path = f"./models/{options.run_id}"
        summaries_dir = "./summaries"
        # else:
        #     model_path = f"/{options.docker_target_name}/models/{options.run_id}"
        #     summaries_dir = f"/{options.docker_target_name}/summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            summaries_dir,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(summaries_dir)
        gauge_write = GaugeWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)

        if options.env_path is None:
            port = AnimalAIEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory_aai(
            options.env_path,
            # options.docker_target_name,
            run_seed,
            port,
            options.n_arenas_per_env,
            options.arena_config,
            options.resolution,
        )
        if options.train_model:
            engine_config = EngineConfig(
                options.width,
                options.height,
                AnimalAIEnvironment.QUALITY_LEVEL.train,
                AnimalAIEnvironment.TIMESCALE.train,
                AnimalAIEnvironment.TARGET_FRAME_RATE.train,
            )
        else:
            engine_config = EngineConfig(
                AnimalAIEnvironment.WINDOW_WIDTH.play,
                AnimalAIEnvironment.WINDOW_HEIGHT.play,
                AnimalAIEnvironment.QUALITY_LEVEL.play,
                AnimalAIEnvironment.TIMESCALE.play,
                AnimalAIEnvironment.TARGET_FRAME_RATE.play,
            )
        env_manager = SubprocessEnvManagerAAI(env_factory, engine_config,
                                              options.num_envs)
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum_config, env_manager, options.lesson)
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            options.run_id,
            model_path,
            options.keep_checkpoints,
            options.train_model,
            options.load_model,
            run_seed,
            maybe_meta_curriculum,
            # options.multi_gpu,
        )
        # Create controller and begin training.
        tc = TrainerControllerAAI(
            trainer_factory,
            model_path,
            summaries_dir,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
            options.train_model,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_timing_tree(summaries_dir, options.run_id)