from ai_traineree.agents.sac import SACAgent as Agent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType

import numpy as np
import pylab as plt


data_logger = TensorboardLogger()
env_name = 'BipedalWalker-v3'
task: TaskType = GymTask(env_name)
config = {
    'warm_up': 500,
    'batch_size': 200,
    'update_freq': 30,
    "number_updates": 1,
    "gamma": 0.99,
    "critic_lr": 1e-3,
    "actor_lr": 2e-3,
    "alpha": 0.2,
    "tau": 0.01,
    "max_grad_norm_alpha": 1.0,
    "max_grad_norm_actor": 10.0,
    "max_grad_norm_critic": 10.0,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100), **config)

env_runner = EnvRunner(task, agent, max_iterations=10000, data_logger=data_logger)
# env_runner.interact_episode(render=True)
env = multiwalker_v7.env()
ma_task = PettingZooTask(env)
ma_task.reset()

state_size = int(ma_task.state_size[0])
action_size = int(ma_task.action_size.shape[0])
agent_number = ma_task.num_agents
config = {
    'device': 'cuda',
    'update_freq': 20,
    'batch_size': 200,
    'agent_names': env.agents,
    'hidden_layers': (500, 300, 100),
}
ma_agent = MADDPGAgent(state_size, action_size, agent_number, **config)
data_logger = TensorboardLogger(log_dir="runs/Multiwalkers-MADDPG")
# data_logger = None

env_runner = MultiAgentCycleEnvRunner(ma_task, ma_agent, max_iterations=9000, data_logger=data_logger)
scores = env_runner.run(reward_goal=20, max_episodes=50, eps_decay=0.99, log_episode_freq=1, force_new=True)

parsed_scores = defaultdict(list)
summed_score = []
for score in scores:
    summed_score.append(0)
    for name, value in score.items():
        parsed_scores[name].append(value)
        summed_score[-1] += value

# plot the scores
fig = plt.figure()
Пример #3
0
import pylab as plt

from ai_traineree.agents.ppo import PPOAgent as Agent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType


env_name = 'LunarLanderContinuous-v2'
data_logger = TensorboardLogger()
task: TaskType = GymTask(env_name)
config = {
    'rollout_length': 60,
    'batch_size': 60,
    "number_updates": 1,

    "using_gae": False,  # Default is True
    "ppo_ratio_clip": 0.2,
    "entropy_weight": 0.0005,
    "gamma": 0.99,
    "action_scale": 1,
    "max_grad_norm_actor": 3.0,
    "max_grad_norm_critic": 5.0,
    "critic_lr": 0.001,
    "actor_lr": 0.0004,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100, 50), **config)
env_runner = EnvRunner(task, agent, data_logger=data_logger)
# env_runner.interact_episode(0, render=True)
scores = env_runner.run(80, 2000, eps_decay=0.99, force_new=True, checkpoint_every=20)
Пример #4
0
env = prison.env(vector_observation=True)
ma_task = PettingZooTask(env)
ma_task.reset()

state_size = ma_task.state_size
action_size = ma_task.action_size.n
agent_number = ma_task.num_agents
config = {
    'device': 'cpu',
    'update_freq': 10,
    'batch_size': 200,
    'agent_names': env.agents,
}
ma_agent = IQLAgents(state_size, action_size, agent_number, **config)
data_logger = TensorboardLogger(log_dir="runs/Prison-IQL")

env_runner = MultiAgentCycleEnvRunner(ma_task,
                                      ma_agent,
                                      max_iterations=9000,
                                      data_logger=data_logger)
scores = env_runner.run(reward_goal=20,
                        max_episodes=50,
                        eps_decay=0.95,
                        log_episode_freq=1,
                        force_new=True)

parsed_scores = defaultdict(list)
summed_score = []
for score in scores:
    summed_score.append(0)
Пример #5
0
from ai_traineree.loggers import TensorboardLogger
from ai_traineree.tasks import GymTask
from typing import Any, Dict
from pprint import pprint

config_default = {'hidden_layers': (50, 50)}
config_updates = [{'n_steps': n} for n in range(1, 11)]

task = GymTask("CartPole-v1")
seeds = [32167, 1, 999, 2833700, 13]

for idx, config_update in enumerate(config_updates):
    config: Dict[str, Any] = config_default.copy()
    config.update(config_update)

    for seed in seeds:
        config['seed'] = seed
        pprint(config)
        torch.manual_seed(config['seed'])
        agent = Agent(task.state_size, task.action_size, **config)

        data_logger = TensorboardLogger(
            log_dir=f'runs/MultiExp-{task.name}-i{idx}-s{seed}')
        env_runner = EnvRunner(task, agent, data_logger=data_logger)
        env_runner.seed(seed)
        env_runner.run(reward_goal=99999,
                       max_episodes=500,
                       eps_decay=0.95,
                       force_new=True)
        data_logger.close()
Пример #6
0
config = {
    'device': device,
    "update_freq": 50,
    "number_updates": 5,
    "batch_size": 200,
    "buffer_size": 1e4,
    "warm_up": 100,
    "lr": 1e-4,
    "pre_network_fn": lambda in_features: network_fn(in_features, 300, device),
    "hidden_layers": None,
    "state_transform": agent_state_tranform,
}
state_size = task.actual_state_size
agent = RainbowAgent(state_size, task.action_size, **config)
data_logger = TensorboardLogger(
    f'runs/{env_name}_{agent.name}_{datetime.datetime.now().strftime("%b%d_%H-%m-%s")}'
)
env_runner = EnvRunner(task,
                       agent,
                       max_iterations=10000,
                       data_logger=data_logger)

scores = env_runner.run(reward_goal=1000,
                        max_episodes=1000,
                        log_every=1,
                        eps_start=0.99,
                        gif_every_episodes=100,
                        force_new=True)
env_runner.interact_episode(render=True)

# plot the scores
Пример #7
0
    'number_updates': 1,
    'hidden_layers': (100, 100),
    'actor_lr': 5e-4,
    'critic_lr': 5e-4,
    'alpha_lr': 3e-5,
    'tau': 0.02,
    "alpha": 0.2,
    'action_scale': 2,
    'action_min': -2,
    'action_max': 2,
    'seed': seed,
}
agent = Agent(task.state_size, task.action_size, **config)

log_dir = f"runs/{env_name}_{agent.name}-{datetime.datetime.now().isoformat()[:-7]}"
data_logger = TensorboardLogger(log_dir=log_dir)
env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed)
scores = env_runner.run(reward_goal=30,
                        max_episodes=500,
                        eps_end=0.01,
                        eps_decay=0.95,
                        force_new=True)
env_runner.interact_episode(0, render=True)
data_logger.close()

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')