示例#1
0
    # Create environment
    env = GridWorld(path=args.input)
    num_states = env.get_num_states()
    num_actions = len(env.get_action_set())
    num_rows, num_cols = env.get_grid_dimensions()

    # Sarsa(0):
    gamma = 0.95
    step_size = 0.1
    num_steps_episode = []
    for seed in range(args.num_seeds):
        random.seed(seed)
        num_steps_episode.append([])
        q_values = np.zeros((num_states, num_actions))
        for i in range(args.num_episodes):
            s = env.get_current_state()
            a = utils.epsilon_greedy(q_values[s])
            num_steps = 0
            while num_steps < args.max_length_ep and not env.is_terminal():
                r = env.act(env.get_action_set()[a])
                next_s = env.get_current_state()
                next_a = utils.epsilon_greedy(q_values[next_s])

                td_error = r + gamma * q_values[next_s][next_a] - q_values[s][a]
                q_values[s][a] = q_values[s][a] + step_size * td_error

                s = next_s
                a = next_a
                num_steps += 1
            env.reset()
            num_steps_episode[seed].append(num_steps)