Пример #1
0
import pytest

from mlagents.trainers.tests.simple_test_envs import (
    SimpleEnvironment,
    MemoryEnvironment,
)

from mlagents.trainers.settings import NetworkSettings

from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import check_environment_trains

BRAIN_NAME = "1D"

PPO_TORCH_CONFIG = ppo_dummy_config()
SAC_TORCH_CONFIG = sac_dummy_config()


@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_ppo(action_size):
    env = SimpleEnvironment([BRAIN_NAME],
                            action_sizes=action_size,
                            step_size=0.8)
    new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
    new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters,
                                  batch_size=64,
                                  buffer_size=1024)
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
Пример #2
0
def dummy_config():
    return attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
Пример #3
0
def dummy_config():
    return sac_dummy_config()
Пример #4
0
    EncoderType,
    FrameworkType,
)
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents_envs.side_channel.environment_parameters_channel import (
    EnvironmentParametersChannel, )
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto, )
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous

from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config

PPO_TF_CONFIG = attr.evolve(ppo_dummy_config(),
                            framework=FrameworkType.TENSORFLOW)
SAC_TF_CONFIG = attr.evolve(sac_dummy_config(),
                            framework=FrameworkType.TENSORFLOW)

BRAIN_NAME = "1D"


# The reward processor is passed as an argument to _check_environment_trains.
# It is applied to the list of all final rewards for each brain individually.
# This is so that we can process all final rewards in different ways for different algorithms.
# Custom reward processors should be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
    rewards_to_use = rewards[-last_n_rewards:]
    # For debugging tests
    print(f"Last {last_n_rewards} rewards:", rewards_to_use)
    return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
Пример #5
0

from mlagents.trainers.tests.simple_test_envs import (
    SimpleEnvironment,
    MemoryEnvironment,
)

from mlagents.trainers.settings import NetworkSettings, FrameworkType

from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import check_environment_trains

BRAIN_NAME = "1D"

PPO_TORCH_CONFIG = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_ppo(action_size):
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
    new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=10000,
    )
Пример #6
0
def dummy_config():
    return attr.evolve(sac_dummy_config(), framework=FrameworkType.TENSORFLOW)
Пример #7
0
def sac_config():
    return RunOptions(behaviors={"test_brain": sac_dummy_config()})
Пример #8
0
    assert rsig_result.unscaled_reward.shape == (BATCH_SIZE,)


def reward_signal_update(optimizer, reward_signal_name):
    buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec)
    feed_dict = optimizer.reward_signals[reward_signal_name].prepare_update(
        optimizer.policy, buffer.make_mini_batch(0, 10), 2
    )
    out = optimizer.policy._execute_model(
        feed_dict, optimizer.reward_signals[reward_signal_name].update_dict
    )
    assert type(out) is dict


@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)
def test_gail_cc(trainer_config, gail_dummy_config):  # noqa: F811
    trainer_config.behavioral_cloning = BehavioralCloningSettings(
        demo_path=CONTINUOUS_DEMO_PATH
    )
    optimizer = create_optimizer_mock(
        trainer_config, gail_dummy_config, False, False, False
    )
    reward_signal_eval(optimizer, "gail")
    reward_signal_update(optimizer, "gail")


@pytest.mark.parametrize(
    "trainer_config", [ppo_dummy_config(), sac_dummy_config()], ids=["ppo", "sac"]
)