示例#1
0
def main(argv):
    del argv
    gw = Gridworld(10, 10, 0, 99)
    #for i in range(7):
    #    gw.grid[7][i] = 1
    agent_module = importlib.import_module("agents." + FLAGS.agent)

    avg_num_steps = np.zeros(FLAGS.num_episodes)
    policy = getattr(policies.tabular_policies, FLAGS.policy)

    for _ in range(FLAGS.num_trials):
        agent = agent_module.Agent(FLAGS.agent, gw.width * gw.height,
                                   FLAGS.gamma, policy, FLAGS.alpha)
        steps_per_episode = []

        for _ in range(FLAGS.num_episodes):
            state = gw.start
            action = agent.select_action(state)
            step = 0
            terminate = False
            while step < FLAGS.max_steps and not terminate:
                next_state = gw.apply_action(state, action)
                terminate, reward = gw.is_goal(next_state)
                next_action = agent.select_action(next_state)
                agent.update(state, action, reward, next_state, next_action)
                state = next_state
                action = next_action
                step += 1
            steps_per_episode.append(step)

        avg_num_steps += np.array(steps_per_episode)

    avg_num_steps = avg_num_steps / FLAGS.num_trials
    plt.plot(avg_num_steps)
    plt.show()
示例#2
0
def main(argv):
    del argv
    gw = Gridworld(10, 10, 0, 80)
    for i in range(7):
        gw.grid[7][i] = 1
    agent_module = importlib.import_module("agents." + FLAGS.agent)

    avg_num_steps = np.zeros(FLAGS.num_episodes)
    policy = getattr(policies.tabular_policies, FLAGS.policy)

    for _ in range(FLAGS.num_trials):
        agent = agent_module.Agent(FLAGS.agent, gw.width * gw.height, FLAGS.n,
                                   FLAGS.gamma, policy, FLAGS.alpha)
        steps_per_episode = []

        for _ in range(FLAGS.num_episodes):
            T = np.Inf
            state = gw.start
            agent.reset_agent()
            agent.stored_states.append(state)
            action = agent.select_action(state)
            agent.stored_actions.append(action)
            step = 0
            tau = 0
            while tau != T - 1:
                if step < T:
                    next_state = gw.apply_action(state, action)
                    terminate, reward = gw.is_goal(next_state)
                    agent.stored_states.append(next_state)
                    agent.stored_rewards.append(reward)
                    if terminate or step == FLAGS.max_steps - 1:
                        T = step + 1
                    else:
                        next_action = agent.select_action(next_state)
                        agent.stored_actions.append(next_action)
                        state = next_state
                        action = next_action
                tau = step - agent.n + 1
                if tau >= 0:
                    agent.update(tau, T)
                step += 1

            steps_per_episode.append(step)

        avg_num_steps += np.array(steps_per_episode)

    avg_num_steps = avg_num_steps / FLAGS.num_trials
    np.save(FLAGS.log_path + "/" + FLAGS.log_file, avg_num_steps)
    plt.plot(avg_num_steps)
    plt.savefig(FLAGS.log_path + "/" + FLAGS.log_file)