Пример #1
0
def test_dqn_get_state():
    # Assign
    state_size, action_size = 3, 4
    init_config = {'lr': 0.1, 'gamma': 0.6}
    agent = DQNAgent(state_size, action_size, device='cpu', **init_config)

    # Act
    agent_state = agent.get_state()

    # Assert
    assert isinstance(agent_state, AgentState)
    assert agent_state.model == DQNAgent.name
    assert agent_state.state_space == state_size
    assert agent_state.action_space == action_size
    assert agent_state.config == agent._config
    assert agent_state.config['lr'] == 0.1
    assert agent_state.config['gamma'] == 0.6

    network_state = agent_state.network
    assert isinstance(network_state, NetworkState)
    assert {'net', 'target_net'} == set(network_state.net.keys())

    buffer_state = agent_state.buffer
    assert isinstance(buffer_state, BufferState)
    assert buffer_state.type == agent.buffer.type
    assert buffer_state.batch_size == agent.buffer.batch_size
    assert buffer_state.buffer_size == agent.buffer.buffer_size
Пример #2
0
def test_serialize_network_state_actual():
    from ai_traineree.agents.dqn import DQNAgent

    agent = DQNAgent(10, 4)
    deterministic_interactions(agent, 30)
    network_state = agent.get_network_state()

    # Act
    ser = serialize(network_state)

    # Assert
    des = json.loads(ser)
    assert set(des['net'].keys()) == set(('target_net', 'net'))
Пример #3
0
def test_dqn_seed():
    # Assign
    agent_0 = DQNAgent(4, 4, device='cpu')  # Reference
    agent_1 = DQNAgent(4, 4, device='cpu')
    agent_2 = copy.deepcopy(agent_1)

    # Act
    # Make sure agents have the same networks
    agent_nets = zip(agent_1.net.value_net.layers, agent_2.net.value_net.layers)
    agent_target_nets = zip(agent_1.target_net.value_net.layers, agent_2.target_net.value_net.layers)
    assert all([sum(sum(l1.weight - l2.weight)) == 0 for l1, l2 in agent_nets])
    assert all([sum(sum(l1.weight - l2.weight)) == 0 for l1, l2 in agent_target_nets])

    agent_0.seed(32167)
    actions_0 = deterministic_interactions(agent_0)
    agent_1.seed(0)
    actions_1 = deterministic_interactions(agent_1)
    agent_2.seed(0)
    actions_2 = deterministic_interactions(agent_2)

    # Assert
    assert any(a0 != a1 for (a0, a1) in zip(actions_0, actions_1))
    # All generated actions need to identical
    for idx, (a1, a2) in enumerate(zip(actions_1, actions_2)):
        assert a1 == a2, f"Action mismatch on position {idx}: {a1} != {a2}"
Пример #4
0
def test_agent_factory_dqn_agent_from_state_network_buffer_none():
    # Assign
    state_size, action_size = 10, 5
    agent = DQNAgent(state_size, action_size, device="cpu")
    state = agent.get_state()
    state.network = None
    state.buffer = None

    # Act
    new_agent = AgentFactory.from_state(state)

    # Assert
    assert id(new_agent) != id(agent)
    assert new_agent.hparams == agent.hparams
Пример #5
0
def test_serialize_agent_state_actual():
    from ai_traineree.agents.dqn import DQNAgent

    agent = DQNAgent(10, 4)
    deterministic_interactions(agent, 30)
    state = agent.get_state()

    # Act
    ser = serialize(state)

    # Assert
    des = json.loads(ser)
    assert des['model'] == DQNAgent.name
    assert len(des['buffer']['data']) == 30
    assert set(des['network']['net'].keys()) == set(('target_net', 'net'))
def test_agent_factory_dqn_agent_from_state():
    # Assign
    state_size, action_size = 10, 5
    agent = DQNAgent(state_size, action_size, device="cpu")
    state = agent.get_state()

    # Act
    new_agent = AgentFactory.from_state(state)

    # Assert
    assert id(new_agent) != id(agent)
    assert new_agent == agent
    assert new_agent.name == DQNAgent.name
    assert new_agent.hparams == agent.hparams
    assert new_agent.buffer == agent.buffer
Пример #7
0
def test_dqn_from_state_network_state_none():
    # Assign
    state_shape, action_size = 10, 3
    agent = DQNAgent(state_shape, action_size)
    agent_state = agent.get_state()
    agent_state.network = None

    # Act
    new_agent = DQNAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, DQNAgent)
    assert new_agent.hparams == agent.hparams
    assert new_agent.buffer == agent.buffer
Пример #8
0
    def __init__(self, state_size: int, action_size: int, num_agents: int,
                 **kwargs):
        """Independent Q-Learning

        A set of independent Q-Learning agents (:py:class:`DQN <DQNAgent>` implementation) that are organized
        to work as an `Multi Agent` agent. These agents have defaults as per DQNAgent class.
        All keyword paramters are passed to each agent.

        Parameters:
            state_size (int): Dimensionality of the state.
            action_size (int): Dimensionality of the action.
            num_agents (int): Number of agents.

        Keyword Arguments:
            hidden_layers (tuple of ints): Shape for fully connected hidden layers.
            noise_scale (float): Default: 1.0. Noise amplitude.
            noise_sigma (float): Default: 0.5. Noise variance.
            actor_lr (float): Default: 0.001. Learning rate for actor network.
            gamma (float): Default: 0.99. Discount value
            tau (float): Default: 0.02. Soft copy value.
            gradient_clip (optional float): Max norm for learning gradient. If None then no clip.
            batch_size (int): Number of samples per learning.
            buffer_size (int): Number of previous samples to remember.
            warm_up (int): Number of samples to see before start learning.
            update_freq (int): How many samples between learning sessions.
            number_updates (int): How many learning cycles per learning session.

        """

        self.state_size: int = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.agent_names = kwargs.get("agent_names",
                                      map(str, range(self.num_agents)))

        kwargs['device'] = self._register_param(kwargs, "device", DEVICE)
        kwargs['hidden_layers'] = to_numbers_seq(
            self._register_param(kwargs, 'hidden_layers', (64, 64)))
        kwargs['gamma'] = float(self._register_param(kwargs, 'gamma', 0.99))
        kwargs['tau'] = float(self._register_param(kwargs, 'tau', 0.002))
        kwargs['gradient_clip'] = self._register_param(kwargs, 'gradient_clip')
        kwargs['batch_size'] = int(
            self._register_param(kwargs, 'batch_size', 64))
        kwargs['buffer_size'] = int(
            self._register_param(kwargs, 'buffer_size', int(1e6)))
        kwargs['warm_up'] = int(self._register_param(kwargs, 'warm_up', 0))
        kwargs['update_freq'] = int(
            self._register_param(kwargs, 'update_freq', 1))
        kwargs['number_updates'] = int(
            self._register_param(kwargs, 'number_updates', 1))

        self.agents: Dict[str, DQNAgent] = {
            agent_name: DQNAgent(state_size,
                                 action_size,
                                 name=agent_name,
                                 **kwargs)
            for agent_name in self.agent_names
        }

        self.reset()
Пример #9
0
def test_runs_dqn():
    # Assign
    task = GymTask('CartPole-v1')
    agent = DQNAgent(task.state_size, task.action_size, device=DEVICE)
    env_runner = EnvRunner(task, agent, max_iterations=50)

    # Act
    env_runner.run(reward_goal=10, max_episodes=10, force_new=True)
Пример #10
0
def test_dqn_from_state():
    # Assign
    state_shape, action_size = 10, 3
    agent = DQNAgent(state_shape, action_size)
    agent_state = agent.get_state()

    # Act
    new_agent = DQNAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, DQNAgent)
    assert new_agent.hparams == agent.hparams
    assert all([torch.all(x == y) for (x, y) in zip(agent.net.parameters(), new_agent.net.parameters())])
    assert all([torch.all(x == y) for (x, y) in zip(agent.target_net.parameters(), new_agent.target_net.parameters())])
    assert new_agent.buffer == agent.buffer
Пример #11
0
 def from_state(state: AgentState) -> AgentBase:
     if state.model == DQNAgent.name:
         return DQNAgent.from_state(state)
     elif state.model == PPOAgent.name:
         return PPOAgent.from_state(state)
     else:
         raise ValueError(
             f"Agent state contains unsupported model type: '{state.model}'"
         )
Пример #12
0
def test_dqn_from_state_one_updated():
    # Assign
    state_shape, action_size = 10, 3
    agent = DQNAgent(state_shape, action_size)
    feed_agent(agent, 2*agent.batch_size)  # Feed 1
    agent_state = agent.get_state()
    feed_agent(agent, 100)  # Feed 2 - to make different

    # Act
    new_agent = DQNAgent.from_state(agent_state)

    # Assert
    assert id(agent) != id(new_agent)
    # assert new_agent == agent
    assert isinstance(new_agent, DQNAgent)
    assert new_agent.hparams == agent.hparams
    assert any([torch.any(x != y) for (x, y) in zip(agent.net.parameters(), new_agent.net.parameters())])
    assert any([torch.any(x != y) for (x, y) in zip(agent.target_net.parameters(), new_agent.target_net.parameters())])
    assert new_agent.buffer != agent.buffer
Пример #13
0
 def from_state(state: AgentState) -> AgentBase:
     norm_model = state.model.upper()
     if norm_model == DQNAgent.name.upper():
         return DQNAgent.from_state(state)
     elif norm_model == PPOAgent.name.upper():
         return PPOAgent.from_state(state)
     elif norm_model == DDPGAgent.name.upper():
         return DDPGAgent.from_state(state)
     elif norm_model == RainbowAgent.name.upper():
         return RainbowAgent.from_state(state)
     else:
         raise ValueError(
             f"Agent state contains unsupported model type: {state.model}")
Пример #14
0
def test_dqn_get_state_compare_different_agents():
    # Assign
    state_size, action_size = 3, 2
    agent_1 = DQNAgent(state_size, action_size, device='cpu', n_steps=1)
    agent_2 = DQNAgent(state_size, action_size, device='cpu', n_steps=2)

    # Act
    state_1 = agent_1.get_state()
    state_2 = agent_2.get_state()

    # Assert
    assert state_1 != state_2
    assert state_1.model == state_2.model
Пример #15
0
config = {
    "update_freq":
    10,
    "batch_size":
    100,
    "warm_up":
    100,
    "lr":
    1e-4,
    "network_fn":
    lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)),
    "state_transform":
    agent_state_tranform,
}
agent = DQNAgent(state_size, task.action_size, **config)
env_runner = EnvRunner(task, agent, max_iterations=2000, writer=writer)

scores = env_runner.run(reward_goal=500,
                        max_episodes=1000,
                        log_every=1,
                        eps_start=0.99,
                        gif_every_episodes=100)
env_runner.interact_episode(render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
Пример #16
0
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import pylab as plt

writer = SummaryWriter()

env_name = 'CartPole-v1'
task = GymTask(env_name)
agent = DQNAgent(task.state_size, task.action_size, n_steps=5)
env_runner = EnvRunner(task, agent, writer=writer)

scores = env_runner.run(
    reward_goal=100,
    max_episodes=5000,
    eps_end=0.002,
    eps_decay=0.99,
    gif_every_episodes=500,
    force_new=True,
)
env_runner.interact_episode(1000, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
Пример #17
0
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.env_runner import EnvRunner
from ai_traineree.tasks import GymTask

import pylab as plt

env_name = 'Breakout-ram-v0'
task = GymTask(env_name)
agent = DQNAgent(task.state_size, task.action_size, hidden_layers=(400, 300))
env_runner = EnvRunner(task, agent)

# env_runner.interact_episode(0, render=True)
scores = env_runner.run(reward_goal=5, max_episodes=5, log_every=1)
env_runner.interact_episode(100, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()
import numpy as np
import pylab as plt

from ai_traineree.env_runner import EnvRunner
from ai_traineree.agents.dqn import DQNAgent
from ai_traineree.tasks import GymTask
from ai_traineree.types import TaskType

env_name = 'LunarLander-v2'
task: TaskType = GymTask(env_name)
config = {'batch_size': 64}
agent = DQNAgent(task.state_size, task.action_size, config=config)
env_runner = EnvRunner(task, agent)

env_runner.interact_episode(0, render=True)
scores = env_runner.run(50, 800, eps_start=1.0, eps_end=0.05, eps_decay=0.995)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()