# Author: Marlos C. Machado import utils import random import plotting import numpy as np from gridworld import GridWorld if __name__ == "__main__": # Read input arguments args = utils.ArgsParser.read_input_args() # Create environment env = GridWorld(path=args.input) num_states = env.get_num_states() num_actions = len(env.get_action_set()) num_rows, num_cols = env.get_grid_dimensions() # Sarsa(0): gamma = 0.95 step_size = 0.1 num_steps_episode = [] for seed in range(args.num_seeds): random.seed(seed) num_steps_episode.append([]) q_values = np.zeros((num_states, num_actions)) for i in range(args.num_episodes): s = env.get_current_state() a = utils.epsilon_greedy(q_values[s]) num_steps = 0 while num_steps < args.max_length_ep and not env.is_terminal():