Exemplo n.º 1
0
def test_autoresolving_proxy_attribute():
    """
    Tests auto-resolving proxy attributes like critic (see for :py:class:`maze.api.utils._ATTRIBUTE_PROXIES` for more
    info).
    """

    cartpole_env_factory = lambda: GymMazeEnv(env=gym.make("CartPole-v0"))

    _, _, critic_composer, _, _ = _get_cartpole_setup_components()
    alg_config = A2CAlgorithmConfig(n_epochs=1,
                                    epoch_length=25,
                                    patience=15,
                                    critic_burn_in_epochs=0,
                                    n_rollout_steps=100,
                                    lr=0.0005,
                                    gamma=0.98,
                                    gae_lambda=1.0,
                                    policy_loss_coef=1.0,
                                    value_loss_coef=0.5,
                                    entropy_coef=0.00025,
                                    max_grad_norm=0.0,
                                    device='cpu',
                                    rollout_evaluator=RolloutEvaluator(
                                        eval_env=SequentialVectorEnv(
                                            [cartpole_env_factory]),
                                        n_episodes=1,
                                        model_selection=None,
                                        deterministic=True))
    default_overrides = {
        "runner.normalization_samples": 1,
        "runner.concurrency": 1
    }

    rc = run_context.RunContext(env=cartpole_env_factory,
                                silent=True,
                                algorithm=alg_config,
                                critic=critic_composer,
                                runner="dev",
                                overrides=default_overrides)
    rc.train(n_epochs=1)
    assert isinstance(rc._runners[RunMode.TRAINING][0].model_composer.critic,
                      TorchSharedStateCritic)

    rc = run_context.RunContext(env=cartpole_env_factory,
                                silent=True,
                                algorithm=alg_config,
                                critic="template_state",
                                runner="dev",
                                overrides=default_overrides)
    rc.train(n_epochs=1)
    assert isinstance(rc._runners[RunMode.TRAINING][0].model_composer.critic,
                      TorchStepStateCritic)
Exemplo n.º 2
0
def test_evaluation():
    """
    Tests evaluation.
    """

    # Test with ES: No rollout evaluator in config.
    rc = run_context.RunContext(
        env=lambda: GymMazeEnv(env=gym.make("CartPole-v0")),
        silent=True,
        configuration="test",
        overrides={
            "runner.normalization_samples": 1,
            "runner.shared_noise_table_size": 10
        })
    rc.train(1)
    stats = rc.evaluate(n_episodes=5)
    assert len(stats) == 1
    assert stats[0][(BaseEnvEvents.reward, "episode_count", None)] in (5, 6)

    # Test with A2C: Partially specified rollout evaluator in config.
    rc = run_context.RunContext(
        env=lambda: GymMazeEnv(env=gym.make("CartPole-v0")),
        silent=True,
        algorithm="a2c",
        configuration="test",
        overrides={"runner.concurrency": 1})
    rc.train(1)
    stats = rc.evaluate(n_episodes=2)
    assert len(stats) == 1
    assert stats[0][(BaseEnvEvents.reward, "episode_count", None)] in (2, 3)

    # Test with A2C and instanatiated RolloutEvaluator.
    rc = run_context.RunContext(
        env=lambda: GymMazeEnv(env=gym.make("CartPole-v0")),
        silent=True,
        algorithm="a2c",
        configuration="test",
        overrides={
            "runner.concurrency":
            1,
            "algorithm.rollout_evaluator":
            RolloutEvaluator(eval_env=SequentialVectorEnv(
                [lambda: GymMazeEnv("CartPole-v0")]),
                             n_episodes=1,
                             model_selection=None,
                             deterministic=True)
        })
    rc.train(1)
    stats = rc.evaluate(n_episodes=5)
    assert len(stats) == 1
    assert stats[0][(BaseEnvEvents.reward, "episode_count", None)] in (1, 2)
Exemplo n.º 3
0
def test_inconsistency_identification_type_3() -> None:
    """
    Tests identification of inconsistency due to derived config group.
    """

    es_dev_runner_config = {
        'state_dict_dump_file': 'state_dict.pt',
        'spaces_config_dump_file': 'spaces_config.pkl',
        'normalization_samples': 10000,
        '_target_': 'maze.train.trainers.es.ESDevRunner',
        'n_eval_rollouts': 10,
        'shared_noise_table_size': 100000000
    }
    a2c_alg_config = A2CAlgorithmConfig(
        n_epochs=1,
        epoch_length=25,
        patience=15,
        critic_burn_in_epochs=0,
        n_rollout_steps=100,
        lr=0.0005,
        gamma=0.98,
        gae_lambda=1.0,
        policy_loss_coef=1.0,
        value_loss_coef=0.5,
        entropy_coef=0.00025,
        max_grad_norm=0.0,
        device='cpu',
        rollout_evaluator=RolloutEvaluator(eval_env=SequentialVectorEnv(
            [lambda: GymMazeEnv(env="CartPole-v0")]),
                                           n_episodes=1,
                                           model_selection=None,
                                           deterministic=True))
    default_overrides = {
        "runner.normalization_samples": 1,
        "runner.concurrency": 1
    }

    rc = run_context.RunContext(algorithm=a2c_alg_config,
                                env=lambda: GymMazeEnv(env="CartPole-v0"),
                                silent=True,
                                runner="dev",
                                overrides=default_overrides)
    rc.train(1)

    run_context.RunContext(env=lambda: GymMazeEnv(env="CartPole-v0"),
                           runner=es_dev_runner_config,
                           silent=True,
                           overrides=default_overrides)
    rc.train(1)
Exemplo n.º 4
0
def test_template_model_composer() -> None:
    """
    Tests behaviour with TemplateModelComposer.
    """

    default_overrides = {
        "algorithm": "es",
        "runner": "dev",
        "runner.normalization_samples": 1,
        "runner.n_eval_rollouts": 1,
        "runner.shared_noise_table_size": 10
    }

    run_context.RunContext(silent=True,
                           model="vector_obs",
                           overrides=default_overrides).train(1)

    # Plug in invalid policy.
    with pytest.raises(TypeError):
        run_context.RunContext(silent=True,
                               model="vector_obs",
                               overrides=default_overrides,
                               policy="random_policy").train(1)

    # Specify valid policy directly.
    run_context.RunContext(
        silent=True,
        model="vector_obs",
        overrides=default_overrides,
        policy={
            '_target_':
            'maze.perception.models.policies.ProbabilisticPolicyComposer'
        }).train(1)

    # Specify valid policy via overrides.
    run_context.RunContext(
        overrides={
            **default_overrides, 'model.policy._target_':
            'maze.perception.models.policies.ProbabilisticPolicyComposer'
        }).train(1)

    # Specify invalid policy target.
    with pytest.raises(ValueError):
        run_context.RunContext(
            silent=True,
            model="vector_obs",
            overrides={
                **default_overrides, 'model.policy._target_': 'x'
            }).train(1)
Exemplo n.º 5
0
def test_multirun():
    """
    Tests multirun capabilities.
    """

    with pytest.raises(BaseException):
        rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'),
                                    silent=True,
                                    algorithm="ppo",
                                    overrides={
                                        "runner.normalization_samples": 1,
                                        "runner.concurrency": 1,
                                        "algorithm.lr": "0.0001,0.0005,0.001"
                                    },
                                    configuration="test",
                                    multirun=False)
        rc.train(n_epochs=1)

    with pytest.raises(TypeError):
        rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'),
                                    silent=True,
                                    algorithm="ppo",
                                    overrides={
                                        "runner.normalization_samples": 1,
                                        "runner.concurrency": 1,
                                        "algorithm.lr":
                                        [0.0001, 0.0005, 0.001]
                                    },
                                    configuration="test",
                                    multirun=False)
        rc.train(n_epochs=1)

    rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'),
                                silent=True,
                                algorithm="ppo",
                                overrides={
                                    "runner.normalization_samples": 1,
                                    "runner.concurrency": 1,
                                    "algorithm.lr": [0.0001, 0.0005, 0.001]
                                },
                                configuration="test",
                                multirun=True)
    rc.train(n_epochs=1)

    assert len(rc.policy) == 3
    assert len(rc.run_dir) == 3
    assert len(rc.config[RunMode.TRAINING]) == 3
    assert len(rc.env_factory) == 3
    assert len(rc.evaluate()) == 3
Exemplo n.º 6
0
def test_inconsistency_identification_type_1() -> None:
    """
    Tests identification of inconsistency due to specified elements being incompatible with the run mode.
    """

    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(runner="parallel").train(1)
Exemplo n.º 7
0
def test_manual_rollout() -> None:
    """
    Test manual rollout via control loop.
    """

    env_factory = lambda: GymMazeEnv('CartPole-v0')
    rc = run_context.RunContext(env=lambda: env_factory(), silent=True)
    rc.train(n_epochs=1)

    env = env_factory()
    obs = env.reset()
    for i in range(2):
        action = rc.compute_action(obs)
        obs, rewards, dones, info = env.step(action)
Exemplo n.º 8
0
def test_multiple_runs() -> None:
    """
    Tests behaviour with execution of multiple subsequent runs.
    """

    rc = run_context.RunContext(silent=True,
                                overrides={
                                    "algorithm": "es",
                                    "runner": "dev",
                                    "runner.normalization_samples": 1,
                                    "runner.n_eval_rollouts": 1,
                                    "runner.shared_noise_table_size": 10
                                })
    rc.train(n_epochs=1)
    rc.train(n_epochs=1)
Exemplo n.º 9
0
def test_env_type():
    """
    Tests whether environment is correctly wrapped.
    """

    rc = run_context.RunContext(silent=True,
                                overrides={
                                    "runner.normalization_samples": 1,
                                    "runner.shared_noise_table_size": 10
                                })
    rc.train(1)
    env = rc.env_factory()

    assert isinstance(env, MazeEnv)
    assert isinstance(env, LogStatsWrapper)
Exemplo n.º 10
0
def test_experiment():
    """
    Tests whether experiments are correctly loaded.
    """

    rc = run_context.RunContext(env=lambda: GymMazeEnv('CartPole-v0'),
                                silent=True,
                                overrides={
                                    "runner.normalization_samples": 1,
                                    "runner.concurrency": 1
                                },
                                experiment="cartpole_ppo_wrappers")
    rc.train(1)

    assert isinstance(rc._runners[RunMode.TRAINING][0]._trainer, PPO)
    assert rc._runners[RunMode.TRAINING][0]._cfg.algorithm.lr == 0.0001
Exemplo n.º 11
0
def test_standalone_training(alg: str, runner: str) -> None:
    """
    Tests whether instantation and standalone training runs with all supported algorithms.
    :param alg: Algorithm to train with.
    :param runner: Runner configuration module name.
    """

    overrides = {
        "env.name": "CartPole-v0",
        "runner.normalization_samples": "1"
    }

    run_context.RunContext(algorithm=alg,
                           overrides=overrides,
                           silent=True,
                           runner=runner,
                           configuration="test").train(n_epochs=1)
Exemplo n.º 12
0
def test_inconsistency_identification_type_2_b() -> None:
    """
    Tests identification of inconsistency due to codependent components.
    """

    _, _, _, a2c_alg_config, default_overrides = _generate_inconsistency_type_2_configs(
    )

    rc = run_context.RunContext(algorithm=a2c_alg_config,
                                silent=True,
                                overrides={
                                    **default_overrides,
                                    "runner.concurrency": 2,
                                    "runner.eval_concurrency": 1,
                                },
                                runner="local")
    rc.train(1)
Exemplo n.º 13
0
def test_overrides() -> None:
    """
    Tests setting of values via overrides dictionary.
    """

    _, policy_composer, _, _, _ = _get_cartpole_setup_components()
    gym_env_name = "CartPole-v0"

    rc = run_context.RunContext(algorithm="a2c",
                                overrides={
                                    "env.name": gym_env_name,
                                    "model.policy": policy_composer,
                                    "runner.normalization_samples": 1,
                                    "runner.concurrency": 1
                                },
                                silent=True)
    rc.train(n_epochs=1)

    train_network = rc.policy.networks[0]
    env = rc.env_factory()
    assert isinstance(env.core_env, GymCoreEnv)
    assert env.core_env.env.unwrapped.spec.id == gym_env_name
    assert isinstance(train_network, FlattenConcatPolicyNet)
    assert train_network.hidden_units == [222, 222]
Exemplo n.º 14
0
def test_inconsistency_identification_type_2_a() -> None:
    """
    Tests identification of inconsistency due to codependent components.
    """

    es_dev_runner_config, a2c_dev_runner_config, invalid_a2c_dev_runner_config, a2c_alg_config, default_overrides = \
        _generate_inconsistency_type_2_configs()

    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               runner=es_dev_runner_config,
                               silent=True,
                               overrides=default_overrides)

    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm=a2c_alg_config,
                               runner=es_dev_runner_config,
                               silent=True,
                               overrides=default_overrides)

    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="es",
                               runner=a2c_dev_runner_config,
                               silent=True,
                               overrides=default_overrides)

    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               runner=invalid_a2c_dev_runner_config,
                               silent=True,
                               overrides=default_overrides)

    rc = run_context.RunContext(algorithm="es",
                                runner=es_dev_runner_config,
                                silent=True,
                                overrides=default_overrides)
    rc.train(1)

    rc = run_context.RunContext(algorithm="a2c",
                                runner=a2c_dev_runner_config,
                                silent=True,
                                overrides=default_overrides)
    rc.train(1)
Exemplo n.º 15
0
def train(n_epochs: int) -> int:
    """
    Trains agent in pure Python.

    :param n_epochs: Number of epochs to train.

    :return: 0 if successful.

    """

    # Environment setup
    # -----------------

    env = cartpole_env_factory()

    # Algorithm setup
    # ---------------

    algorithm_config = A2CAlgorithmConfig(
        n_epochs=5,
        epoch_length=25,
        patience=15,
        critic_burn_in_epochs=0,
        n_rollout_steps=100,
        lr=0.0005,
        gamma=0.98,
        gae_lambda=1.0,
        policy_loss_coef=1.0,
        value_loss_coef=0.5,
        entropy_coef=0.00025,
        max_grad_norm=0.0,
        device='cpu',
        rollout_evaluator=RolloutEvaluator(
            eval_env=SequentialVectorEnv([cartpole_env_factory]),
            n_episodes=1,
            model_selection=None,
            deterministic=True
        )
    )

    # Custom model setup
    # ------------------

    # Policy customization
    # ^^^^^^^^^^^^^^^^^^^^

    # Policy network.
    policy_net = CartpolePolicyNet(
        obs_shapes={'observation': env.observation_space.spaces['observation'].shape},
        action_logit_shapes={'action': (env.action_space.spaces['action'].n,)}
    )
    policy_networks = [policy_net]

    # Policy distribution.
    distribution_mapper = DistributionMapper(action_space=env.action_space, distribution_mapper_config={})

    # Policy composer.
    policy_composer = ProbabilisticPolicyComposer(
        action_spaces_dict=env.action_spaces_dict,
        observation_spaces_dict=env.observation_spaces_dict,
        # Derive distribution from environment's action space.
        distribution_mapper=distribution_mapper,
        networks=policy_networks,
        # We have only one agent and network, thus this is an empty list.
        substeps_with_separate_agent_nets=[],
        # We have only one step and one agent.
        agent_counts_dict={0: 1}
    )

    # Critic customization
    # ^^^^^^^^^^^^^^^^^^^^

    # Value networks.
    value_networks = {
        0: TorchModelBlock(
            in_keys='observation', out_keys='value',
            in_shapes=env.observation_space.spaces['observation'].shape,
            in_num_dims=[2],
            out_num_dims=2,
            net=CartpoleValueNet({'observation': env.observation_space.spaces['observation'].shape})
        )
    }

    # Critic composer.
    critic_composer = SharedStateCriticComposer(
        observation_spaces_dict=env.observation_spaces_dict,
        agent_counts_dict={0: 1},
        networks=value_networks,
        stack_observations=True
    )

    # Training
    # ^^^^^^^^

    rc = run_context.RunContext(
        env=cartpole_env_factory,
        algorithm=algorithm_config,
        policy=policy_composer,
        critic=critic_composer,
        runner="dev"
    )
    rc.train(n_epochs=n_epochs)

    # Distributed training
    # ^^^^^^^^^^^^^^^^^^^^

    algorithm_config.rollout_evaluator.eval_env = SubprocVectorEnv([cartpole_env_factory])
    rc = run_context.RunContext(
        env=cartpole_env_factory,
        algorithm=algorithm_config,
        policy=policy_composer,
        critic=critic_composer,
        runner="local"
    )
    rc.train(n_epochs=n_epochs)

    # Evaluation
    # ^^^^^^^^^^

    print("-----------------")
    evaluator = RolloutEvaluator(
        eval_env=LogStatsWrapper.wrap(cartpole_env_factory(), logging_prefix="eval"),
        n_episodes=1,
        model_selection=None
    )
    evaluator.evaluate(rc.policy)

    return 0
Exemplo n.º 16
0
def test_inconsistency_identification_type_4_invalid() -> None:
    """
    Tests identification of inconsistency due to specification of super- and subcomponents.
    """

    model_composer, policy_composer, _, _, _ = _get_cartpole_setup_components()
    model_policy_target = "maze.perception.models.policies.ProbabilisticPolicyComposer"

    # With nesting level > 1. Both parent and child in overrides.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               env=lambda: GymMazeEnv(env="CartPole-v0"),
                               silent=True,
                               overrides={
                                   "policy": policy_composer,
                                   "model.policy._target_": model_policy_target
                               })

    # With nesting level > 1, parent in overrides with proxy path.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(
            algorithm="a2c",
            env=lambda: GymMazeEnv(env="CartPole-v0"),
            silent=True,
            policy=policy_composer,
            overrides={"model.policy._target_": model_policy_target})

    # With nesting level > 1, with proxy path, parent with full path.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               env=lambda: GymMazeEnv(env="CartPole-v0"),
                               silent=True,
                               overrides={
                                   "policy": policy_composer,
                                   "policy._target_": model_policy_target
                               })

    # With nesting level > 1, both with proxy path.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               env=lambda: GymMazeEnv(env="CartPole-v0"),
                               silent=True,
                               overrides={
                                   "policy": policy_composer,
                                   "policy._target_": model_policy_target
                               })

    # With nesting level > 1, parent as explicit argument.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(
            algorithm="a2c",
            env=lambda: GymMazeEnv(env="CartPole-v0"),
            silent=True,
            policy=policy_composer,
            overrides={"model.policy._target_": model_policy_target})

    # With override referencing explicit argument.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               env=lambda: GymMazeEnv(env="CartPole-v0"),
                               silent=True,
                               model=model_composer,
                               overrides={"model.policy": policy_composer})

    # With override referencing explicit argument via proxy.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               env=lambda: GymMazeEnv(env="CartPole-v0"),
                               silent=True,
                               model=model_composer,
                               overrides={"policy": policy_composer})

    # With explicit argument referencing other explicit argument.
    with pytest.raises(run_context.InvalidSpecificationError):
        run_context.RunContext(algorithm="a2c",
                               env=lambda: GymMazeEnv(env="CartPole-v0"),
                               silent=True,
                               model=model_composer,
                               policy=policy_composer)
Exemplo n.º 17
0
def test_inconsistency_identification_type_4_valid() -> None:
    """
    Tests identification of inconsistency due to specification of super- and subcomponents.
    """

    _, policy_composer, _, _, _ = _get_cartpole_setup_components()
    model_policy_target = "maze.perception.models.policies.ProbabilisticPolicyComposer"
    model_dictconfig = {
        '_target_':
        'maze.perception.models.custom_model_composer.CustomModelComposer',
        'distribution_mapper_config': [{
            'action_space':
            'gym.spaces.Box',
            'distribution':
            'maze.distributions.squashed_gaussian.SquashedGaussianProbabilityDistribution'
        }],
        'policy': {
            '_target_':
            'maze.perception.models.policies.ProbabilisticPolicyComposer',
            'networks': [{
                '_target_':
                'maze.perception.models.built_in.flatten_concat.FlattenConcatPolicyNet',
                'non_lin': 'torch.nn.Tanh',
                'hidden_units': [222, 222]
            }],
            "substeps_with_separate_agent_nets": [],
            "agent_counts_dict": {
                0: 1
            }
        },
        'critic': {
            '_target_':
            'maze.perception.models.critics.StateCriticComposer',
            'networks': [{
                '_target_':
                'maze.perception.models.built_in.flatten_concat.FlattenConcatStateValueNet',
                'non_lin': 'torch.nn.Tanh',
                'hidden_units': [256, 256]
            }]
        }
    }
    default_overrides = {"runner.concurrency": 1}

    # With DictConfig parent (legal).
    rc = run_context.RunContext(algorithm="a2c",
                                env=lambda: GymMazeEnv(env="CartPole-v0"),
                                silent=True,
                                model=model_dictconfig,
                                policy=policy_composer,
                                runner="dev",
                                overrides=default_overrides,
                                configuration="test")
    rc.train(1)
    assert rc.policy.networks[0].hidden_units == [222, 222]

    # With config module name parent.
    rc = run_context.RunContext(algorithm="a2c",
                                env=lambda: GymMazeEnv(env="CartPole-v0"),
                                silent=True,
                                model="flatten_concat",
                                policy=policy_composer,
                                runner="dev",
                                overrides=default_overrides,
                                configuration="test")
    rc.train(1)
    assert rc.policy.networks[0].hidden_units == [222, 222]

    # With config module name parent and aliased child override.
    rc = run_context.RunContext(algorithm="a2c",
                                env=lambda: GymMazeEnv(env="CartPole-v0"),
                                silent=True,
                                policy=model_dictconfig["policy"],
                                runner="dev",
                                overrides={
                                    "policy._target_": model_policy_target,
                                    **default_overrides
                                },
                                configuration="test")
    rc.train(1)
    assert rc.policy.networks[0].hidden_units == [222, 222]

    # With config module name parent and non-aliased child override.
    rc = run_context.RunContext(
        algorithm="a2c",
        env=lambda: GymMazeEnv(env="CartPole-v0"),
        silent=True,
        policy=model_dictconfig["policy"],
        runner="dev",
        overrides={"model.policy._target_": model_policy_target},
        configuration="test")
    rc.train(1)
    assert rc.policy.networks[0].hidden_units == [222, 222]