示例#1
0
from drl.openai.cartpole import CartPoleVisual

game_steps_per_step = 2
batch_per_game_step = 64
batch_size = game_steps_per_step * batch_per_game_step

w = 128
h = 64
t = 4
memory_size = 10000

hyperparams = TrainingHyperparameters(gamma=1,
                                      beta=linear_increase(0.05,
                                                           min_value=0.3,
                                                           max_value=1.),
                                      exploration_rate=linear_decay(
                                          0.05, max_value=0.8, min_value=0.01),
                                      batch_size=batch_size,
                                      game_steps_per_step=game_steps_per_step,
                                      copy_to_target_every=200,
                                      game_steps_per_epoch=1000)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using device %s' % device)

game = CartPoleVisual(w, h, t)
memory = PrioritizedReplayMemory(memory_size)
policy_net = DuelingDQN(w, h, t, len(game.actions)).to(device)

model = LearningModel(game=game,
                      memory=memory,
                      policy_net=policy_net,
from drl.deepq.replay_memory import SimpleReplayMemory
from drl.deepq.train import TrainingHyperparameters, linear_increase, linear_decay, train, print_validation
from drl.openai.pong import Pong30Min

game_steps_per_step = 1
batch_per_game_step = 32
batch_size = game_steps_per_step * batch_per_game_step

w = h = 84
t = 4
memory_size = 100000

hyperparams = TrainingHyperparameters(
  gamma=0.99,
  beta=linear_increase(0.02),
  exploration_rate=linear_decay(0.006, max_value=1., min_value=0.02),
  batch_size=batch_size,
  game_steps_per_step=game_steps_per_step,
  copy_to_target_every=1000,
  game_steps_per_epoch=1000,
  init_memory_steps=10000,
  warmup_rounds=100
)


def create_game():
  return Pong30Min(w, h)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using device %s' % device)
示例#3
0
from drl.deepq.train import TrainingHyperparameters, linear_increase, linear_decay, train, print_validation
from drl.openai.pong import Pong

steps_to_train = 500000
episode_factor = 5
w = h = 84
t = 4
memory_size = 50000
game_steps_per_step = 4
batch_per_game_step = 32
batch_size = game_steps_per_step * batch_per_game_step
hyperparams = TrainingHyperparameters(
    gamma=0.99,
    beta=linear_increase(0.01 * episode_factor),
    exploration_rate=linear_decay(0.008 * episode_factor,
                                  max_value=1.,
                                  min_value=0.01),
    batch_size=batch_size,
    game_steps_per_step=game_steps_per_step,
    copy_to_target_every=1000,
    game_steps_per_epoch=1000 * episode_factor,
    multi_step_n=4,
    warmup_rounds=100,
    init_memory_steps=300,
    parallel_game_processes=2,
    max_batches_prefetch=6,
    states_on_device=True)


def create_game():
    return Pong(w, h, t, store_frames_as=torch.half)
示例#4
0
def train_with(device: torch.device, steps_to_train: int,
               game_steps_per_step: int, prio_memory: bool):
    episode_factor = 5
    w = h = 84
    t = 4
    memory_size = 50000
    batch_per_game_step = 32
    batch_size = game_steps_per_step * batch_per_game_step
    hyperparams = TrainingHyperparameters(
        gamma=0.99,
        beta=linear_increase(0.01 * episode_factor),
        exploration_rate=linear_decay(0.008 * episode_factor,
                                      max_value=1.,
                                      min_value=0.01),
        batch_size=batch_size,
        game_steps_per_step=game_steps_per_step,
        copy_to_target_every=1000,
        game_steps_per_epoch=1000 * episode_factor,
        multi_step_n=4,
        warmup_rounds=500,
        init_memory_steps=1000,
        parallel_game_processes=2,
        max_batches_prefetch=10,
        states_on_device=True)

    with create_game() as _game:
        strategy_name = 'floaton-steps%d-%s' % (game_steps_per_step, 'prm'
                                                if prio_memory else 'srm')
        if prio_memory:
            memory = PrioritizedReplayMemory(memory_size)
        else:
            memory = SimpleReplayMemory(memory_size)
        policy_net = DQN_RBP(w, h, t, len(_game.actions)).to(device)
        target_net = DQN_RBP(w, h, t, len(_game.actions)).to(device)
        optimizer = Adam(policy_net.parameters(), lr=1e-4)

        summary_writer = SummaryWriter(
            'runs/%s-%s-%s' %
            (_game.name, strategy_name, datetime.now().isoformat()))
        model = LearningModel(memory=memory,
                              policy_net=policy_net,
                              target_net=target_net,
                              input_dtype=torch.float,
                              optimizer=optimizer,
                              strategy_name=strategy_name,
                              game_name=_game.name,
                              device=device,
                              status=TrainingStatus(summary_writer))
    print('%s: Model prepared' % strategy_name)

    # %%
    train(model,
          create_game,
          hyperparams,
          steps_to_train // hyperparams.game_steps_per_epoch,
          save_every=0)
    save_checkpoint(model)

    with create_game() as game:
        print('Running validation of', strategy_name)
        print_validation(model, game, 5)
    print('%s completed' % strategy_name)