示例#1
0
# Author: Marlos C. Machado

import utils
import random
import plotting
import numpy as np
from gridworld import GridWorld

if __name__ == "__main__":
    # Read input arguments
    args = utils.ArgsParser.read_input_args()

    # Create environment
    env = GridWorld(path=args.input)
    num_states = env.get_num_states()
    num_actions = len(env.get_action_set())
    num_rows, num_cols = env.get_grid_dimensions()

    # Sarsa(0):
    gamma = 0.95
    step_size = 0.1
    num_steps_episode = []
    for seed in range(args.num_seeds):
        random.seed(seed)
        num_steps_episode.append([])
        q_values = np.zeros((num_states, num_actions))
        for i in range(args.num_episodes):
            s = env.get_current_state()
            a = utils.epsilon_greedy(q_values[s])
            num_steps = 0
            while num_steps < args.max_length_ep and not env.is_terminal():