# In[ ]:

for episode in range(EPISODES):
    observation = env.reset()
    QL.curr_episode = episode

    while True:
        if RENDER_ENV: env.render()

        valid_move = False
        action = None

        while not valid_move:

            # Choose an action based on observation
            if action == None: action = QL.choose_action(observation)

            observation_, reward, done, info = env.step(action)
            valid_move = info['valid']

            reward = QL.calculate_reward(valid_move, done, reward,
                                         observation_)

            QL.save_experience(observation=observation,
                               action=action,
                               reward=reward,
                               observation_=observation_,
                               is_game_over=done,
                               is_move_valid=valid_move)

            action = (action + 1) % QL.n_y
示例#2
0
from q_state import next_state, random_state, actions
from q_learning import QLearning
from q_table import QTable

if __name__ == "__main__":
    episode = 100
    model_save_interval = 10

    table = QTable(actions)
    learning = QLearning(table)

    for step in range(episode):
        init_state = random_state()
        i = 0
        reward = 0
        while reward != 1:
            state = init_state
            while True:
                i += 1
                action = learning.choose_action(state)
                state2, reward, done = next_state(state, action, table)
                learning.learn(state, action, reward, state2, done)
                if done:
                    break
                state = state2
        print(init_state, i, len(table.q_table))
        if (step + 1) % model_save_interval == 0:
            table.save()