class TestMultiagentAtariPresets(unittest.TestCase):
    def setUp(self):
        self.env = MultiagentAtariEnv('pong_v2', device='cpu')
        self.env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_independent(self):
        env = MultiagentAtariEnv('pong_v2', device='cpu')
        presets = {
            agent_id: dqn.device('cpu').env(env.subenvs[agent_id]).build()
            for agent_id in env.agents
        }
        self.validate_preset(
            IndependentMultiagentPreset('independent', 'cpu', presets), env)

    def validate_preset(self, preset, env):
        # normal agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.last())
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.last())
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.last())
示例#2
0
 def test_reset(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     state = env.reset()
     self.assertEqual(state.observation.shape, (1, 84, 84))
     self.assertEqual(state.reward, 0)
     self.assertEqual(state.done, False)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state['agent'], 'first_0')
示例#3
0
 def test_step_tensor(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     state = env.step(torch.tensor([0]))
     self.assertEqual(state.observation.shape, (1, 84, 84))
     self.assertEqual(state.reward, 0)
     self.assertEqual(state.done, False)
     self.assertEqual(state.mask, 1.)
     self.assertEqual(state['agent'], 'second_0')
示例#4
0
 def test_independent_cuda(self):
     env = MultiagentAtariEnv('pong_v2', max_cycles=1000, device=CUDA)
     presets = {
         agent_id: dqn.device(CUDA).env(env.subenvs[agent_id]).build()
         for agent_id in env.agents
     }
     validate_multiagent(IndependentMultiagentPreset('independent', CUDA, presets), env)
 def test_independent(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     presets = {
         agent_id: dqn.device('cpu').env(env.subenvs[agent_id]).build()
         for agent_id in env.agents
     }
     self.validate_preset(IndependentMultiagentPreset('independent', 'cpu', presets), env)
def main():
    parser = argparse.ArgumentParser(description="Run an multiagent Atari benchmark.")
    parser.add_argument("env", help="Name of the Atari game (e.g. Pong).")
    parser.add_argument(
        "agent", help="Name of the agent (e.g. dqn). See presets for available agents."
    )
    parser.add_argument(
        "--device",
        default="cuda",
        help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).",
    )
    parser.add_argument(
        "--frames", type=int, default=40e6, help="The number of training frames."
    )
    parser.add_argument(
        "--render", type=bool, default=False, help="Render the environment."
    )
    parser.add_argument(
        "--writer", default='tensorboard', help="The backend used for tracking experiment metrics."
    )
    args = parser.parse_args()

    env = MultiagentAtariEnv(args.env, device=args.device)
    agent_name = args.agent
    agent = getattr(multiagent_atari, agent_name)
    experiment = MultiagentEnvExperiment(agent(device=args.device), env, write_loss=False, writer=args.writer)
    experiment.train(frames=args.frames)
def main():
    parser = argparse.ArgumentParser(
        description="Watch pretrained multiagent atari")
    parser.add_argument("env", help="Name of the Atari game (e.g. pong-v1)")
    parser.add_argument("filename", help="File where the model was saved.")
    parser.add_argument(
        "--device",
        default="cuda",
        help=
        "The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)",
    )
    parser.add_argument(
        "--fps",
        default=30,
        type=int,
        help="Playback speed",
    )
    parser.add_argument("--reload",
                        action="store_true",
                        default=False,
                        help="Reload the model from disk after every episode")
    args = parser.parse_args()
    env = MultiagentAtariEnv(args.env, device=args.device)
    watch(env, args.filename, args.fps, args.reload)
示例#8
0
 def test_is_done(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     self.assertFalse(env.is_done('first_0'))
     self.assertFalse(env.is_done('second_0'))
示例#9
0
 def test_list_agents(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     self.assertEqual(env.agents, ['first_0', 'second_0'])
示例#10
0
 def test_init(self):
     MultiagentAtariEnv('pong_v1', device='cpu')
     MultiagentAtariEnv('mario_bros_v2', device='cpu')
     MultiagentAtariEnv('entombed_cooperative_v2', device='cpu')
示例#11
0
 def test_action_spaces(self):
     action_spaces = MultiagentAtariEnv('pong_v1',
                                        device='cpu').action_spaces
     self.assertEqual(action_spaces['first_0'].n, 18)
     self.assertEqual(action_spaces['second_0'].n, 18)
示例#12
0
 def test_state_spaces(self):
     state_spaces = MultiagentAtariEnv('pong_v1', device='cpu').state_spaces
     self.assertEqual(state_spaces['first_0'].shape, (1, 84, 84))
     self.assertEqual(state_spaces['second_0'].shape, (1, 84, 84))
 def setUp(self):
     np.random.seed(0)
     torch.manual_seed(0)
     self.env = MultiagentAtariEnv('space_invaders_v1', device='cpu')
     self.env.seed(0)
     self.experiment = None
示例#14
0
 def test_name(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     self.assertEqual(env.name, 'pong_v1')
 def setUp(self):
     self.env = MultiagentAtariEnv('pong_v2', device='cpu')
     self.env.reset()
示例#16
0
 def test_agent_iter(self):
     env = MultiagentAtariEnv('pong_v1', device='cpu')
     env.reset()
     it = iter(env.agent_iter())
     self.assertEqual(next(it), 'first_0')
class TestMultiagentEnvExperiment(unittest.TestCase):
    def setUp(self):
        np.random.seed(0)
        torch.manual_seed(0)
        self.env = MultiagentAtariEnv('space_invaders_v1', device='cpu')
        self.env.seed(0)
        self.experiment = None

    def test_adds_default_name(self):
        experiment = MockExperiment(self.make_preset(),
                                    self.env,
                                    quiet=True,
                                    save_freq=float('inf'))
        self.assertEqual(experiment._writer.label,
                         "independent_space_invaders_v1")

    def test_adds_custom_name(self):
        experiment = MockExperiment(self.make_preset(),
                                    self.env,
                                    name='custom',
                                    quiet=True,
                                    save_freq=float('inf'))
        self.assertEqual(experiment._writer.label, "custom_space_invaders_v1")

    def test_writes_training_returns(self):
        experiment = MockExperiment(self.make_preset(),
                                    self.env,
                                    quiet=True,
                                    save_freq=float('inf'))
        experiment.train(episodes=3)
        self.assertEqual(
            experiment._writer.data, {
                'evaluation/first_0/returns/frame': {
                    'values': [465.0, 235.0, 735.0, 415.0],
                    'steps': [766, 1524, 2440, 3038]
                },
                'evaluation/second_0/returns/frame': {
                    'values': [235.0, 465.0, 170.0, 295.0],
                    'steps': [766, 1524, 2440, 3038]
                }
            })

    def test_writes_test_returns(self):
        experiment = MockExperiment(self.make_preset(),
                                    self.env,
                                    quiet=True,
                                    save_freq=float('inf'))
        experiment.train(episodes=3)
        experiment._writer.data = {}
        experiment.test(episodes=3)
        self.assertEqual(list(experiment._writer.data.keys()), [
            'evaluation/first_0/returns-test/mean',
            'evaluation/first_0/returns-test/std',
            'evaluation/second_0/returns-test/mean',
            'evaluation/second_0/returns-test/std'
        ])
        steps = experiment._writer.data[
            'evaluation/first_0/returns-test/mean']['steps'][0]
        for datum in experiment._writer.data.values():
            self.assertEqual(len(datum['values']), 1)
            self.assertGreaterEqual(datum['values'][0], 0.0)
            self.assertEqual(len(datum['steps']), 1)
            self.assertEqual(datum['steps'][0], steps)

    def test_writes_loss(self):
        experiment = MockExperiment(self.make_preset(),
                                    self.env,
                                    quiet=True,
                                    write_loss=True,
                                    save_freq=float('inf'))
        self.assertTrue(experiment._writer.write_loss)
        experiment = MockExperiment(self.make_preset(),
                                    self.env,
                                    quiet=True,
                                    write_loss=False,
                                    save_freq=float('inf'))
        self.assertFalse(experiment._writer.write_loss)

    def make_preset(self):
        return IndependentMultiagentPreset(
            'independent', 'cpu', {
                agent: dqn.device('cpu').env(env).build()
                for agent, env in self.env.subenvs.items()
            })