Exemplo n.º 1
0
def test_learn_play(d = 6, num_layers = 2, num_units = 100,
                    eps = 0.5, iters = 10000, draw=False,
                    tabular = True, batch=False, batch_epochs=10,
                    num_episodes = 10, episode_length = 100):
    iters_per_value = 1 if iters <= 10 else int(iters / 10.0)
    scores = []
    def interact(q, iter=0):
        if iter % iters_per_value == 0:
            scores.append((iter, evaluate(game, num_episodes, episode_length,
                                          lambda s: greedy(q, s))[0]))
            print('score', scores[-1], flush=True)
    game = No_Exit(d)
    if tabular:
        q = TabularQ(game.states, game.actions)
    else:
        q = NNQ(game.states, game.actions, game.state2vec, num_layers, num_units,
                epochs=batch_epochs if batch else 1)
    if batch:
        qf = Q_learn_batch(game, q, iters=iters, episode_length = 100, n_episodes=10,
                           interactive_fn=interact)
    else:
        qf = Q_learn(game, q, iters=iters, interactive_fn=interact)
    if scores:
        print('String to upload (incude quotes): "%s"'%toHex(pickle.dumps([tabular, batch, scores], 0).decode()))
        # Plot learning curve
        plot_points(np.array([s[0] for s in scores]),
                    np.array([s[1] for s in scores]))
    for i in range(num_episodes):
        reward, _, animation = sim_episode(game, (episode_length if d > 5 else episode_length/2),
                                lambda s: greedy(qf, s), draw=draw)
        print('Reward', reward)
    return animation
Exemplo n.º 2
0
def test_solve_play(d=6, draw=False,
                    num_episodes=10, episode_length=100):
    game = TempSim()
    qf = value_iteration(game, TabularQ(game.states, game.actions))
    for i in range(num_episodes):
        reward, _ = sim_episode(game, (episode_length if d > 5 else episode_length / 2),
                                lambda s: greedy(qf, s), draw=draw)
        print('Reward', reward)
Exemplo n.º 3
0
def test_solve_play(d = 5, draw=False,
                    num_episodes = 10, episode_length = 100):
    game = No_Exit(d)
    qf = value_iteration(game , TabularQ(game.states, game.actions))
    for i in range(num_episodes):
        reward, _, animation = sim_episode(game, (episode_length if d > 3 else episode_length/2),
                                lambda s: greedy(qf, s), draw=draw)
        print('Reward', reward)
    return animation
Exemplo n.º 4
0
def emulate(game, q, episode_length, num_episodes=1):
    for i in range(num_episodes):
        reward, _ = sim_episode(game, episode_length,
                                lambda s: greedy(q, s), interactive_fn=print)
        print('Reward', reward)