示例#1
0
文件: dqn.py 项目: weidler/RLaSpa
    def update(self, state, action, reward, next_state, done) -> float:
        loss: torch.Tensor = super().update(state, action, reward, next_state, done)

        if self.total_steps_done % 100 == 0:
            update_agent_model(self.model, self.target_model)

        return loss
示例#2
0
def train(iterations: int, batch_size: int):
    state = env.reset()
    losses = []
    all_rewards = []
    episode_reward = 0
    for iteration in range(1, iterations + 1):
        epsilon = epsilon_calculator.value(time_step=iteration)
        action = current_model.act(state=state, epsilon=epsilon)

        next_state, reward, done, _ = env.step(action)
        memory.push(state, action, reward, next_state, done)

        state = next_state
        episode_reward += reward

        if done:
            state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0

        if len(memory) > batch_size:
            # when saved plays are greater than the batch size calculate losses
            loss = compute_td_loss(batch_size=batch_size, beta=args.beta)
            losses.append(loss.item())

        if iteration % 100 == 0:
            update_agent_model(current=current_model, target=target_model)

        if iteration % 200 == 0:
            print('Iteration: {0}'.format(iteration))
            print('Rewards: {0}'.format(all_rewards[-9:]))
    update_agent_model(current=current_model, target=target_model)
示例#3
0
文件: dqn.py 项目: weidler/RLaSpa
 def __init__(self, num_features: int, num_actions: int, eps_calculator: Schedule, memory_eps_calculator: Schedule,
              memory_size: int = 10000, batch_size: int = 32, learning_rate: float = 2e-3, gamma: float = 0.99,
              memory_delay: int = 5000, representation_network: torch.nn.Module = None) -> None:
     # configure parent parameters
     super().__init__(num_features, num_actions, eps_calculator, memory_eps_calculator, memory_size, batch_size,
                      learning_rate, gamma, memory_delay, representation_network)
     # target model needs repr network as well because otherwise copying over parameters will be non trivial
     self.target_model = DQN(num_features=num_features, num_actions=num_actions,
                             representation_network=representation_network)
     update_agent_model(current=self.model, target=self.target_model)
示例#4
0
文件: dqn.py 项目: weidler/RLaSpa
 def finish_training(self) -> None:
     super().finish_training()
     update_agent_model(self.model, self.target_model)
示例#5
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='DDQN agent execution')
    parser.add_argument('--env', type=str, metavar='E', default='CartPole-v0', help='GYM environment')
    parser.add_argument('--init_eps', type=float, metavar='I', default=1.0, help='Initial epsilon')
    parser.add_argument('--min_eps', type=float, metavar='M', default=0.01, help='Minimum epsilon')
    parser.add_argument('--eps_decay', type=int, metavar='D', default=5000, help='Epsilon decay')
    parser.add_argument('--gamma', type=int, metavar='G', default=0.99, help='Gamma')
    parser.add_argument('--memory_size', type=int, metavar='S', default=10000, help='Memory size')
    parser.add_argument('--alpha', type=float, metavar='A', default=0.8,
                        help='How much prioritization is used (0 - no prioritization, 1 - full prioritization)')
    parser.add_argument('--beta', type=float, metavar='B', default=0.8,
                        help='Degree to use importance weights (0 - no corrections, 1 - full correction)')
    parser.add_argument('--batch_size', type=int, metavar='BS', default=32, help='Batch size')
    parser.add_argument('--iterations', type=int, metavar='IT', default=30000, help='Training iterations')
    args = parser.parse_args()
    # Environment information extraction
    env = gym.make(args.env)
    number_of_observations = env.observation_space.shape[0]
    number_of_actions = env.action_space.n
    # Agent creation and configuration
    current_model = DQN(num_features=number_of_observations, num_actions=number_of_actions)
    target_model = DQN(num_features=number_of_observations, num_actions=number_of_actions)
    update_agent_model(current=current_model, target=target_model)
    optimizer = optim.Adam(current_model.parameters())
    memory = PrioritizedReplayMemory(capacity=args.memory_size, alpha=args.alpha)
    epsilon_calculator = ExponentialSchedule(initial_p=args.init_eps, min_p=args.min_eps, decay=args.eps_decay)
    # Training
    train(iterations=args.iterations, batch_size=args.batch_size)
    play(iterations=10000, render=True)
    save_model(target_model, "../../models/ddqn.model")