示例#1
0
def test_globaltrainingstatus(tmpdir):
    path_dir = os.path.join(tmpdir, "test.json")

    GlobalTrainingStatus.set_parameter_state("Category1",
                                             StatusType.LESSON_NUM, 3)
    GlobalTrainingStatus.save_state(path_dir)

    with open(path_dir) as fp:
        test_json = json.load(fp)

    assert "Category1" in test_json
    assert StatusType.LESSON_NUM.value in test_json["Category1"]
    assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3
    assert "metadata" in test_json

    GlobalTrainingStatus.load_state(path_dir)
    restored_val = GlobalTrainingStatus.get_parameter_state(
        "Category1", StatusType.LESSON_NUM)
    assert restored_val == 3

    # Test unknown categories and status types (keys)
    unknown_category = GlobalTrainingStatus.get_parameter_state(
        "Category3", StatusType.LESSON_NUM)

    class FakeStatusType(Enum):
        NOTAREALKEY = "notarealkey"

    unknown_key = GlobalTrainingStatus.get_parameter_state(
        "Category1", FakeStatusType.NOTAREALKEY)
    assert unknown_category is None
    assert unknown_key is None
示例#2
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """

    options.checkpoint_settings.run_id = "test8"

    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (os.path.join(base_path,
                                        checkpoint_settings.initialize_from)
                           if checkpoint_settings.initialize_from else None)
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json"))
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

    engine_config = EngineConfig(
        width=engine_settings.width,
        height=engine_settings.height,
        quality_level=engine_settings.quality_level,
        time_scale=engine_settings.time_scale,
        target_frame_rate=engine_settings.target_frame_rate,
        capture_frame_rate=engine_settings.capture_frame_rate,
    )
    if env_settings.env_path is None:
        port = None
    # Begin training

    env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"
    env_factory = create_environment_factory(
        env_settings.env_path,
        engine_settings.no_graphics,
        run_seed,
        port,
        env_settings.env_args,
        os.path.abspath(
            run_logs_dir),  # Unity environment requires absolute path
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       env_settings.num_envs)

    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum, env_manager, restore=checkpoint_settings.resume)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.parameter_randomization, run_seed)
    max_steps = options.behaviors['Brain'].max_steps
    options.behaviors['Brain'].max_steps = 10

    trainer_factory = TrainerFactory(options,
                                     write_path,
                                     not checkpoint_settings.inference,
                                     checkpoint_settings.resume,
                                     run_seed,
                                     maybe_init_path,
                                     maybe_meta_curriculum,
                                     False,
                                     total_steps=0)
    trainer_factory.trainer_config[
        'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT

    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        write_path,
        checkpoint_settings.run_id,
        maybe_meta_curriculum,
        not checkpoint_settings.inference,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    try:
        # Get inital weights
        tc.init_weights(env_manager)
        inital_weights = deepcopy(tc.weights)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)

    options.behaviors['Brain'].max_steps = max_steps
    step = 0
    counter = 0
    max_meta_updates = 200
    while counter < max_meta_updates:
        sample = np.random.random_sample()
        if (sample > 1):
            print("Performing Meta-learning on Carry Object stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe"
        else:
            print("Performing Meta-learning on Find Target stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"

        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(
                run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, engine_config,
                                           env_settings.num_envs)

        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum,
            env_manager,
            restore=checkpoint_settings.resume)
        sampler_manager, resampling_interval = create_sampler_manager(
            options.parameter_randomization, run_seed)

        trainer_factory = TrainerFactory(options,
                                         write_path,
                                         not checkpoint_settings.inference,
                                         checkpoint_settings.resume,
                                         run_seed,
                                         maybe_init_path,
                                         maybe_meta_curriculum,
                                         False,
                                         total_steps=step)

        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate = 0.0005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.beta = 0.005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.epsilon = 0.2 * (
                1 - counter / max_meta_updates)
        print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format(
            trainer_factory.trainer_config['Brain'].hyperparameters.
            learning_rate,
            trainer_factory.trainer_config['Brain'].hyperparameters.beta,
            trainer_factory.trainer_config['Brain'].hyperparameters.epsilon))

        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )
        try:
            # Get inital weights
            print("Start learning at step: " + str(step) + " meta_step: " +
                  str(counter))
            print("Inital weights: " + str(inital_weights[8]))
            weights_after_train = tc.start_learning(env_manager,
                                                    inital_weights)

            print(tc.trainers['Brain'].optimizer)

            # weights_after_train = tc.weights
            # print("Trained weights: " + str(weights_after_train[8]))
            step += options.behaviors['Brain'].max_steps
            print("meta step:" + str(step))
            # print(weights_after_train)
            # equal = []
            # for i, weight in enumerate(tc.weights):
            #     equal.append(np.array_equal(inital_weights[i], weights_after_train[i]))
            # print(all(equal))
        finally:
            print(len(weights_after_train), len(inital_weights))
            for i, weight in enumerate(weights_after_train):
                inital_weights[i] = weights_after_train[i]
            env_manager.close()
            write_run_options(write_path, options)
            write_timing_tree(run_logs_dir)
            write_training_status(run_logs_dir)
        counter += 1
示例#3
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from is not None
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )

        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
示例#4
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum, env_manager, restore=checkpoint_settings.resume
        )
        maybe_add_samplers(options.parameter_randomization, env_manager, run_seed)
        trainer_factory = TrainerFactory(
            options.behaviors,
            write_path,
            not checkpoint_settings.inference,
            checkpoint_settings.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
示例#5
0
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param num_areas: Number of training areas to instantiate
    :param options: parsed command line arguments
    """
    with hierarchical_timer("run_training.setup"):
        torch_utils.set_torch_config(options.torch_settings)
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings

        run_logs_dir = checkpoint_settings.run_logs_dir
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            checkpoint_settings.write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            checkpoint_settings.maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states in case of resume
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # In case of initialization, set full init_path for all behaviors
        elif checkpoint_settings.maybe_init_path is not None:
            setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path)

        # Configure Tensorboard Writers and StatsReporter
        stats_writers = register_stats_writer_plugins(options)
        for sw in stats_writers:
            StatsReporter.add_writer(sw)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            num_areas,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs)
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=checkpoint_settings.write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=checkpoint_settings.maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            checkpoint_settings.write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(checkpoint_settings.write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)