def test_agents_can_play_games_of_different_dimensions():
    config.num_episodes_to_run = 10
    config.hyperparameters["DQN_Agents"]["batch_size"] = 3
    AGENTS = [
        A2C, A3C, PPO, DDQN, DQN_With_Fixed_Q_Targets,
        DDQN_With_Prioritised_Experience_Replay, DQN
    ]
    trainer = Trainer(config, AGENTS)
    config.environment = gym.make("CartPole-v0")
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()

    AGENTS = [SAC, TD3, PPO, DDPG]
    config.environment = gym.make("MountainCarContinuous-v0")
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()

    AGENTS = [DDQN, SNN_HRL]
    config.environment = Four_Rooms_Environment(
        15,
        15,
        stochastic_actions_probability=0.25,
        random_start_user_place=True,
        random_goal_place=False)
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        assert agent.agent_name in results.keys()
def run_train(args):
    save_path = os.path.join(args.logdir, args.agent+'@cps'+str(args.stage))
    args.save_path = init_trial_path(save_path)
    with open(os.path.join(args.save_path, 'args.json'), 'w') as f:
        json.dump(args.__dict__, f)
    print(args)
    config=init_config(args)
    agent = get_agent(args.agent)
    trainer = Trainer(config, [agent])
    results = trainer.run_games_for_agents()
    agent_name = agent.agent_name
    res_age=np.array(results[agent_name][0][0])
    score = (max(res_age)+np.mean(res_age[-10:]))/2
    converge_step_1=np.where(res_age>0)[0]
    converge_step_2= np.where(res_age > 100)[0]
    if len(converge_step_1)>0:
        converge_step_1 =converge_step_1[0]
    else:
        converge_step_1 = -1
    if len(converge_step_2)>0:
        converge_step_2 =converge_step_2[0]
    else:
        converge_step_2 = -1
    print("Train Done")
    print(f"Agent={agent_name}, Score={score}, Path={args.save_path}")
    print(f"Final Reward={np.mean(res_age[-10:])}, Converge Step 0 ={converge_step_1}, Converge Step 100 ={converge_step_2},")
    agent = trainer.trained_agents[0][0]
    return agent
Exemplo n.º 3
0
def test_agent_solve_bit_flipping_game():
    AGENTS = [DQN]
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        agent_results = results[agent.agent_name]
        agent_results = np.max(agent_results[0][1][50:])
        assert agent_results >= 0.0, "Failed for {} -- score {}".format(
            agent.agent_name, agent_results)
def test_agent_solve_bit_flipping_game():
    AGENTS = [
        PPO, DDQN, DQN_With_Fixed_Q_Targets,
        DDQN_With_Prioritised_Experience_Replay, DQN, DQN_HER
    ]
    trainer = Trainer(config, AGENTS)
    results = trainer.run_games_for_agents()
    for agent in AGENTS:
        agent_results = results[agent.agent_name]
        agent_results = np.max(agent_results[0][1][50:])
        assert agent_results >= 0.0, "Failed for {} -- score {}".format(
            agent.agent_name, agent_results)
Exemplo n.º 5
0
            "batch_norm": False,
            "buffer_size": 1000000,
            "tau": 0.005,
            "gradient_clipping_norm": 5,
            "initialiser": "Xavier"
        },
        "min_steps_before_learning": 400,
        "batch_size": 64,
        "discount_rate": 0.99,
        "mu": 0.0,  #for O-H noise
        "theta": 0.15,  #for O-H noise
        "sigma": 0.25,  #for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "automatically_tune_entropy_hyperparameter": True,
        "entropy_term_weight": None,
        "add_extra_noise": False,
        "do_evaluation_iterations": True
    }
}

if __name__ == "__main__":
    # turn it on if you want to test specific network saved
    test = False
    dictPath = None
    AGENTS = [SAC_Discrete]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents(test, dictPath)
Exemplo n.º 6
0
            "batch_norm": False,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [400, 300],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "batch_size": 64,
        "discount_rate": 0.99,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.2,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "clip_rewards": False
    }
}

if __name__ == "__main__":
    AGENTS = [DDPG, HIRO]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents()