示例#1
0
文件: train.py 项目: apourchot/DOOMRL
print("Starting the training.")
time_start = time()
for epoch in range(epochs):

    print("\nEpoch %d\n-------" % (epoch + 1))
    train_episodes_finished = 0

    # training
    print("Training...")
    train_scores = []
    losses = []
    game.new_episode()

    for learning_step in trange(learning_steps_per_epoch, leave=False):

        loss = model.step(training=True)
        losses.append(loss)
        if game.is_episode_finished():
            score = game.get_total_reward()
            train_scores.append(score)
            game.new_episode()
            train_episodes_finished += 1

    train_scores = np.array(train_scores)
    losses = np.array(losses)
    print("%d training episodes played." % train_episodes_finished)
    print("Current size of the memory buffer:", sys.getsizeof(model.memory))
    print(
        "Results: mean score: %.1f +/- %.1f," %
        (train_scores.mean(), train_scores.std()),
        "min: %.1f," % train_scores.min(), "max: %.1f," % train_scores.max())
示例#2
0
文件: test.py 项目: apourchot/DOOMRL
            actions,
            file_name,
            ddqn=use_ddqn,
            parameter_exploration=use_parameter_exploration,
            gpu=use_gpu,
            loading=1)

print("======================================")
print("Testing trained neural network.")

print("Testing...")
test_scores = []

for _ in range(episodes_to_watch):

    game.new_episode()
    while not game.is_episode_finished():
        model.step(training=False, showing=True)

    # Sleep between episodes
    sleep(1.0)
    score = game.get_total_reward()
    test_scores.append(score)
    print("Total score: ", score)

test_scores = np.array(test_scores)
print("%d test episodes played." % episodes_to_watch)
print(
    "Results: mean: %.1f +/- %.1f," % (test_scores.mean(), test_scores.std()),
    "min: %.1f," % test_scores.min(), "max: %.1f," % test_scores.max())