Exemplo n.º 1
0
        decision = random.random()
        direction = 0

        if decision < epsilon:
            direction = random.randint(-1, 1)
        else:
            shape = (1, ) + last_frames.shape
            last_frames_copy = last_frames.reshape(shape)
            Q = model.predict(last_frames_copy)
            direction = np.argmax(Q) - 1

        reward = gameEnv.stepForwardWithReward(direction)
        total_reward += reward
        over = gameEnv.isOver()
        last_frames_new = gameEnv.getLastFrames(num_frames)
        print('last_frames shape', last_frames_new.copy().shape)
        memoryPiece = [
            last_frames.copy(),
            last_frames_new.copy(), direction, reward
        ]
        memoryPool.addMemory(memoryPiece)
        last_frames = last_frames_new
        X_batch, targets = memoryPool.getMemoryBatch(batch_size, model, gamma)
        loss += float(model.train_on_batch(X_batch, targets))

    if checkpoint_freq and (episode % checkpoint_freq) == 0:
        model.save(f'dqn-{episode:08d}.model')

    summary = 'Episode {:5d}/{:5d} | Loss {:8.4f} | Total Reward {:4d}'
    print(summary.format(episode + 1, num_episode, loss, total_reward))