def test_init_memory(self):
        model = Sequential()
        when(model).compile(loss='mean_squared_error',
                            optimizer='Adam').thenReturn()
        environment = mock(
            {
                'observation_space': Discrete(8),
                'action_space': Discrete(3)
            }, FL)
        when(environment).reset().thenReturn(0)
        #when(environment).step(...).thenReturn((1, 10, True))
        expect(environment, times=2).step(...).thenReturn((1, 10, True))

        configuration = mock(
            {
                'model': model,
                'memory_size': 2,
                'nb_iterations': 0,
                'training_params': [],
                'plot_training': False
            }, Configuration)

        test_Agent = DQN(environment, configuration)

        verify(environment, times=2).step(...)
示例#2
0
 def __init__(
     self,
     state_size,
     n_actions,
     args,
     device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
     super().__init__(state_size, n_actions, args, device=device)
     self.target_net = DQN(state_size, n_actions,
                           layers=self.layers).to(self.device)
     self.target_net.load_state_dict(self.policy_net.state_dict())
     self.target_net.eval()
示例#3
0
    def __init__(
        self,
        state_size,
        n_actions,
        args,
        device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        self.device = device

        # Exploration / Exploitation params.
        self.steps_done = 0
        self.eps_threshold = 1
        self.eps_start = args.eps_start
        self.eps_end = args.eps_end
        self.eps_decay = args.eps_decay

        # RL params
        self.target_update = args.target_update
        self.discount = args.discount

        # Env params
        self.n_actions = n_actions
        self.state_size = state_size

        # Deep q networks params
        self.layers = args.layers
        self.batch_size = args.batch_size
        self.policy_net = DQN(state_size, n_actions,
                              layers=self.layers).to(self.device).float()
        self.target_net = None
        self.grad_clip = args.grad_clip

        if str(args.optimizer).lower() == 'adam':
            self.optimizer = optim.Adam(self.policy_net.parameters())
        if str(args.optimizer).lower() == 'rmsprop':
            self.optimizer = optim.RMSprop(self.policy_net.parameters())
        else:
            raise NotImplementedError

        self.memory = ReplayMemory(args.replay_size)

        # Performance buffers.
        self.rewards_list = []