Пример #1
0
def rollout_agent(environment, base_agent, extender, num_steps, td_k=True):
    torch.manual_seed(SEED)
    np.random.seed(SEED)

    environment = GymEnvironment(environment, SEED)
    agent = extender.default(
        environment,
        base_agent_name=base_agent,
        num_steps=num_steps,
        num_samples=2,
        num_iter=2,
        num_epochs=2,
        td_k=td_k,
    )
    train_agent(
        agent,
        environment,
        num_episodes=NUM_EPISODES,
        max_steps=MAX_STEPS,
        plot_flag=False,
    )
    evaluate_agent(agent,
                   environment,
                   num_episodes=NUM_EPISODES,
                   max_steps=MAX_STEPS,
                   render=False)
    agent.logger.delete_directory()  # Cleanup directory.
Пример #2
0
def test_tabular_interaction(agent, policy):
    LEARNING_RATE = 0.1
    environment = EasyGridWorld()

    critic = TabularQFunction(num_states=environment.num_states,
                              num_actions=environment.num_actions)
    policy = policy(critic, 0.1)
    optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE)
    criterion = torch.nn.MSELoss

    agent = agent(
        critic=critic,
        policy=policy,
        criterion=criterion,
        optimizer=optimizer,
        target_update_frequency=TARGET_UPDATE_FREQUENCY,
        gamma=GAMMA,
    )

    train_agent(
        agent,
        environment,
        num_episodes=NUM_EPISODES,
        max_steps=MAX_STEPS,
        plot_flag=False,
    )
    evaluate_agent(agent, environment, 1, MAX_STEPS, render=False)
    agent.logger.delete_directory()  # Cleanup directory.
Пример #3
0
def test_policies(environment, policy, batch_size):
    environment = GymEnvironment(environment, SEED)

    critic = NNQFunction(
        dim_state=environment.dim_observation,
        dim_action=environment.dim_action,
        num_states=environment.num_states,
        num_actions=environment.num_actions,
        layers=LAYERS,
        tau=TARGET_UPDATE_TAU,
    )

    policy = policy(critic, 0.1)

    optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE)
    criterion = torch.nn.MSELoss
    agent = SARSAAgent(
        critic=critic,
        policy=policy,
        criterion=criterion,
        optimizer=optimizer,
        batch_size=batch_size,
        target_update_frequency=TARGET_UPDATE_FREQUENCY,
        gamma=GAMMA,
    )
    train_agent(
        agent,
        environment,
        num_episodes=NUM_EPISODES,
        max_steps=MAX_STEPS,
        plot_flag=False,
    )
    evaluate_agent(agent, environment, 1, MAX_STEPS, render=False)
    agent.logger.delete_directory()  # Cleanup directory.
Пример #4
0
def train(agent, environment, args):
    """Train agent."""
    train_agent(
        agent=agent,
        environment=environment,
        num_episodes=args.num_train,
        max_steps=args.max_steps,
        eval_frequency=args.eval_frequency,
        print_frequency=args.print_frequency,
        render=args.render_train,
    )
Пример #5
0
def rollout_agent(environment, agent):
    torch.manual_seed(SEED)
    np.random.seed(SEED)

    environment = GymEnvironment(environment, SEED)
    agent = agent.default(environment, num_iter=2, num_epochs=2)
    train_agent(
        agent,
        environment,
        num_episodes=NUM_EPISODES,
        max_steps=MAX_STEPS,
        plot_flag=False,
    )
    evaluate_agent(agent, environment, 1, MAX_STEPS, render=False)
    agent.logger.delete_directory()  # Cleanup directory.
Пример #6
0
    def train_student_on_task(self,
                              student,
                              training_task,
                              action_limit,
                              eval_task_params=None,
                              pretrain=False):
        train_agent(student,
                    environment=training_task,
                    callbacks=[my_callback],
                    plot_flag=False,
                    callback_frequency=1)

        # return the trajectory and rewards
        global rews, dones
        return (None, ), rews, dones
Пример #7
0
def train_and_evaluate(agent,
                       environment,
                       params,
                       plot_callbacks=None,
                       save_milestones=None):
    """Train and evaluate agent on environment."""
    # %% Train Agent
    agent.logger.save_hparams(params.toDict())
    with gpytorch.settings.fast_computations(
    ), gpytorch.settings.fast_pred_var(), (
            gpytorch.settings.fast_pred_samples()), (
                gpytorch.settings.memory_efficient()):
        train_agent(
            agent,
            environment,
            num_episodes=params.train_episodes,
            max_steps=params.environment_max_steps,
            plot_flag=params.plot_train_results,
            callback_frequency=1,
            print_frequency=params.print_frequency,
            save_milestones=save_milestones,
            render=params.render_train,
            callbacks=plot_callbacks,
        )
    agent.logger.export_to_json()  # Save statistics.

    # %% Test agent.
    metrics = dict()
    evaluate_agent(
        agent,
        environment,
        num_episodes=params.test_episodes,
        max_steps=params.environment_max_steps,
        render=params.render_test,
    )

    returns = np.mean(
        agent.logger.get("environment_return")[-params.test_episodes:])
    metrics.update({"test/test_env_returns": returns})
    returns = np.mean(
        agent.logger.get("environment_return")[:-params.test_episodes])
    metrics.update({"test/train_env_returns": returns})

    agent.logger.log_hparams(params.toDict(), metrics)
Пример #8
0
def main(args):
    """Run experiment."""
    set_random_seed(args.seed)
    env_config = parse_config_file(args.env_config_file)

    environment = GymEnvironment(
        env_config["name"], ctrl_cost_weight=env_config["action_cost"], seed=args.seed
    )
    reward_model = environment.env.reward_model()
    if args.exploration == "optimistic":
        dynamical_model = HallucinatedModel.default(environment, beta=args.beta)
        environment.add_wrapper(HallucinationWrapper)
    else:
        dynamical_model = TransformedModel.default(environment)
    kwargs = parse_config_file(args.agent_config_file)

    agent = getattr(
        importlib.import_module("rllib.agent"), f"{args.agent}Agent"
    ).default(
        environment=environment,
        dynamical_model=dynamical_model,
        reward_model=reward_model,
        thompson_sampling=args.exploration == "thompson",
        **kwargs,
    )
    train_agent(
        agent=agent,
        environment=environment,
        max_steps=env_config["max_steps"],
        num_episodes=args.train_episodes,
        render=args.render,
        print_frequency=1,
    )

    evaluate_agent(
        agent=agent,
        environment=environment,
        max_steps=env_config["max_steps"],
        num_episodes=args.test_episodes,
    )
Пример #9
0
def test_tabular_interaction(agent, policy):
    LEARNING_RATE = 0.1
    environment = EasyGridWorld()

    critic = TabularQFunction(num_states=environment.num_states,
                              num_actions=environment.num_actions)
    policy = policy(critic, 0.1)
    optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE)
    criterion = torch.nn.MSELoss
    memory = ExperienceReplay(max_len=MEMORY_MAX_SIZE)

    agent = agent(
        critic=critic,
        policy=policy,
        criterion=criterion,
        optimizer=optimizer,
        memory=memory,
        batch_size=BATCH_SIZE,
        target_update_frequency=TARGET_UPDATE_FREQUENCY,
        gamma=GAMMA,
    )

    train_agent(
        agent,
        environment,
        num_episodes=NUM_EPISODES,
        max_steps=MAX_STEPS,
        plot_flag=False,
    )
    evaluate_agent(agent, environment, 1, MAX_STEPS, render=False)
    agent.logger.delete_directory()  # Cleanup directory.

    torch.testing.assert_allclose(
        critic.table.shape,
        torch.Size([environment.num_actions, environment.num_states]),
    )
Пример #10
0
import torch.optim

from rllib.agent import DPGAgent, TD3Agent  # noqa: F401
from rllib.dataset import ExperienceReplay, PrioritizedExperienceReplay  # noqa: F401
from rllib.environment import GymEnvironment
from rllib.util.parameter_decay import ExponentialDecay
from rllib.util.training.agent_training import evaluate_agent, train_agent

ENVIRONMENT = ["MountainCarContinuous-v0", "Pendulum-v0"][0]
NUM_EPISODES = 25
MAX_STEPS = 2500
GAMMA = 0.99
EPS_START = 1.0
EPS_END = 0.1
EPS_DECAY = 1e6
SEED = 0

torch.manual_seed(SEED)
np.random.seed(SEED)

environment = GymEnvironment(ENVIRONMENT, SEED)
noise = ExponentialDecay(EPS_START, EPS_END, EPS_DECAY)
agent = DPGAgent.default(environment, exploration_noise=noise, gamma=GAMMA)

train_agent(agent,
            environment,
            num_episodes=NUM_EPISODES,
            max_steps=MAX_STEPS,
            render=True)
evaluate_agent(agent, environment, 1, MAX_STEPS)
Пример #11
0
import numpy as np
import torch.optim

from rllib.agent import SACAgent
from rllib.dataset import ExperienceReplay, PrioritizedExperienceReplay  # noqa: F401
from rllib.environment import GymEnvironment
from rllib.util.training.agent_training import evaluate_agent, train_agent

ENVIRONMENT = ["MountainCarContinuous-v0", "Pendulum-v0"][1]
NUM_EPISODES = 40
MAX_STEPS = 1000
GAMMA = 0.99
SEED = 1

torch.manual_seed(SEED)
np.random.seed(SEED)

environment = GymEnvironment(ENVIRONMENT, SEED)

agent = SACAgent.default(environment, eta=1.0, regularization=True, gamma=GAMMA)

train_agent(
    agent,
    environment,
    num_episodes=NUM_EPISODES,
    max_steps=MAX_STEPS,
    print_frequency=1,
    render=True,
)
evaluate_agent(agent, environment, num_episodes=1, max_steps=MAX_STEPS)
Пример #12
0
"""Working example of REPS."""
import numpy as np
import torch

from rllib.agent import REPSAgent
from rllib.environment import GymEnvironment
from rllib.util.training.agent_training import evaluate_agent, train_agent

ETA = 1.0
NUM_EPISODES = 100

GAMMA = 1
SEED = 0
ENVIRONMENT = "CartPole-v0"
MAX_STEPS = 200

torch.manual_seed(SEED)
np.random.seed(SEED)

environment = GymEnvironment(ENVIRONMENT, SEED)

agent = REPSAgent.default(environment,
                          epsilon=ETA,
                          regularization=True,
                          gamma=GAMMA)
train_agent(agent,
            environment,
            num_episodes=NUM_EPISODES,
            max_steps=MAX_STEPS + 1)
evaluate_agent(agent, environment, num_episodes=1, max_steps=MAX_STEPS + 1)