from ai_traineree.agents.sac import SACAgent as Agent from ai_traineree.env_runner import EnvRunner from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType import numpy as np import pylab as plt data_logger = TensorboardLogger() env_name = 'BipedalWalker-v3' task: TaskType = GymTask(env_name) config = { 'warm_up': 500, 'batch_size': 200, 'update_freq': 30, "number_updates": 1, "gamma": 0.99, "critic_lr": 1e-3, "actor_lr": 2e-3, "alpha": 0.2, "tau": 0.01, "max_grad_norm_alpha": 1.0, "max_grad_norm_actor": 10.0, "max_grad_norm_critic": 10.0, } agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100), **config) env_runner = EnvRunner(task, agent, max_iterations=10000, data_logger=data_logger) # env_runner.interact_episode(render=True)
env = multiwalker_v7.env() ma_task = PettingZooTask(env) ma_task.reset() state_size = int(ma_task.state_size[0]) action_size = int(ma_task.action_size.shape[0]) agent_number = ma_task.num_agents config = { 'device': 'cuda', 'update_freq': 20, 'batch_size': 200, 'agent_names': env.agents, 'hidden_layers': (500, 300, 100), } ma_agent = MADDPGAgent(state_size, action_size, agent_number, **config) data_logger = TensorboardLogger(log_dir="runs/Multiwalkers-MADDPG") # data_logger = None env_runner = MultiAgentCycleEnvRunner(ma_task, ma_agent, max_iterations=9000, data_logger=data_logger) scores = env_runner.run(reward_goal=20, max_episodes=50, eps_decay=0.99, log_episode_freq=1, force_new=True) parsed_scores = defaultdict(list) summed_score = [] for score in scores: summed_score.append(0) for name, value in score.items(): parsed_scores[name].append(value) summed_score[-1] += value # plot the scores fig = plt.figure()
import pylab as plt from ai_traineree.agents.ppo import PPOAgent as Agent from ai_traineree.env_runner import EnvRunner from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from ai_traineree.types import TaskType env_name = 'LunarLanderContinuous-v2' data_logger = TensorboardLogger() task: TaskType = GymTask(env_name) config = { 'rollout_length': 60, 'batch_size': 60, "number_updates": 1, "using_gae": False, # Default is True "ppo_ratio_clip": 0.2, "entropy_weight": 0.0005, "gamma": 0.99, "action_scale": 1, "max_grad_norm_actor": 3.0, "max_grad_norm_critic": 5.0, "critic_lr": 0.001, "actor_lr": 0.0004, } agent = Agent(task.state_size, task.action_size, hidden_layers=(100, 100, 50), **config) env_runner = EnvRunner(task, agent, data_logger=data_logger) # env_runner.interact_episode(0, render=True) scores = env_runner.run(80, 2000, eps_decay=0.99, force_new=True, checkpoint_every=20)
env = prison.env(vector_observation=True) ma_task = PettingZooTask(env) ma_task.reset() state_size = ma_task.state_size action_size = ma_task.action_size.n agent_number = ma_task.num_agents config = { 'device': 'cpu', 'update_freq': 10, 'batch_size': 200, 'agent_names': env.agents, } ma_agent = IQLAgents(state_size, action_size, agent_number, **config) data_logger = TensorboardLogger(log_dir="runs/Prison-IQL") env_runner = MultiAgentCycleEnvRunner(ma_task, ma_agent, max_iterations=9000, data_logger=data_logger) scores = env_runner.run(reward_goal=20, max_episodes=50, eps_decay=0.95, log_episode_freq=1, force_new=True) parsed_scores = defaultdict(list) summed_score = [] for score in scores: summed_score.append(0)
from ai_traineree.loggers import TensorboardLogger from ai_traineree.tasks import GymTask from typing import Any, Dict from pprint import pprint config_default = {'hidden_layers': (50, 50)} config_updates = [{'n_steps': n} for n in range(1, 11)] task = GymTask("CartPole-v1") seeds = [32167, 1, 999, 2833700, 13] for idx, config_update in enumerate(config_updates): config: Dict[str, Any] = config_default.copy() config.update(config_update) for seed in seeds: config['seed'] = seed pprint(config) torch.manual_seed(config['seed']) agent = Agent(task.state_size, task.action_size, **config) data_logger = TensorboardLogger( log_dir=f'runs/MultiExp-{task.name}-i{idx}-s{seed}') env_runner = EnvRunner(task, agent, data_logger=data_logger) env_runner.seed(seed) env_runner.run(reward_goal=99999, max_episodes=500, eps_decay=0.95, force_new=True) data_logger.close()
config = { 'device': device, "update_freq": 50, "number_updates": 5, "batch_size": 200, "buffer_size": 1e4, "warm_up": 100, "lr": 1e-4, "pre_network_fn": lambda in_features: network_fn(in_features, 300, device), "hidden_layers": None, "state_transform": agent_state_tranform, } state_size = task.actual_state_size agent = RainbowAgent(state_size, task.action_size, **config) data_logger = TensorboardLogger( f'runs/{env_name}_{agent.name}_{datetime.datetime.now().strftime("%b%d_%H-%m-%s")}' ) env_runner = EnvRunner(task, agent, max_iterations=10000, data_logger=data_logger) scores = env_runner.run(reward_goal=1000, max_episodes=1000, log_every=1, eps_start=0.99, gif_every_episodes=100, force_new=True) env_runner.interact_episode(render=True) # plot the scores
'number_updates': 1, 'hidden_layers': (100, 100), 'actor_lr': 5e-4, 'critic_lr': 5e-4, 'alpha_lr': 3e-5, 'tau': 0.02, "alpha": 0.2, 'action_scale': 2, 'action_min': -2, 'action_max': 2, 'seed': seed, } agent = Agent(task.state_size, task.action_size, **config) log_dir = f"runs/{env_name}_{agent.name}-{datetime.datetime.now().isoformat()[:-7]}" data_logger = TensorboardLogger(log_dir=log_dir) env_runner = EnvRunner(task, agent, data_logger=data_logger, seed=seed) scores = env_runner.run(reward_goal=30, max_episodes=500, eps_end=0.01, eps_decay=0.95, force_new=True) env_runner.interact_episode(0, render=True) data_logger.close() # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #')