Пример #1
0
def run_agent_RMS_value(num_runs, num_episodes, discount, step_size, step=1):
    """ Run SARSA agent for num_episodes to get the state values """
    mdp = RandomWalk(19, -1)
    s = mdp.init()

    # ground truth for value
    gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1])
    # initial value
    init_v = np.asarray([0.5] * mdp.num_states())[1:-1]

    # Arrays for RMS error over all states
    rms_err = np.asarray([0.0] * (num_episodes + 1))
    sum_rms_err = np.asarray([0.0] * (num_episodes + 1))
    rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v)))

    # create n-step SARSA agent
    agent = Sarsa(mdp, s, step)

    for run in range(num_runs):
        for i in range(num_episodes):
            agent.episode(discount, step_size, 10000)
            agent.init()
            rms_err[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v)))
        sum_rms_err += rms_err
        # Reset Q after a run
        agent.reset_Q()

    # averaged over num_runs
    return sum_rms_err / num_runs
Пример #2
0
def print_value():
    """ Print the RMS error on state values """
    mdp = RandomWalk(19, 1)
    # ground truth for value
    gt_v = np.asarray(mdp.value_equiprobable(1.0)[1:-1])
    # initial value
    init_v = np.asarray([0.5] * mdp.num_states())[1:-1]
    rms_err = np.sqrt(np.mean(np.square(init_v - gt_v)))
    print("RMS error is ", rms_err)
Пример #3
0
def decay_agent(n=1, alpha=0.5, episodes=100, ep_start=30, decay=0.7):
    """ Run an agent for specified n-step Qsigma method with sigma decay"""
    mdp = RandomWalk(19, -1)
    s = mdp.init()
    num_runs = 250
    num_episodes = episodes
    discount = 1.0
    step_size = alpha
    steps = n

    # Arrays for sum of rewards for each episodes
    Q_opt = mdp.Q_equiprobable(1.0)
    rms_err = 0.0

    # create n-step Qsigma agent
    agent = QSigma(mdp, 1.0, s, steps)
    agent.set_policy_equiprobable()

    for run in range(num_runs):
        sqerr = 0.0
        agent._Psigma = 1.0
        for i in range(num_episodes):
            if i > ep_start:
                agent._Psigma *= decay
            agent.episode(discount, step_size)
            agent.init()
        count = 0
        for s in range(mdp.num_states()):
            for a in range(mdp.num_actions(s)):
                count += 1
                sqerr += (1 / count) * (
                    (agent.Q[s][a] - Q_opt[s][a])**2 - sqerr)
        rms_err += sqerr**0.5
        # Reset Q after a run
        agent.reset_Q()

    rms_err /= num_runs

    return rms_err
Пример #4
0
def run_agent_RMS_param(num_runs, num_episodes, discount, step_size, step=1, agent_type="Sarsa"):
    """ Run the n-step Sarsa agent and return the avg RMS over all states, episodes and runs """
    mdp = RandomWalk(19, -1)
    s = mdp.init()

    # ground truth for value
    gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1])
    # initial value
    init_v = np.asarray([0.5] * mdp.num_states())[1:-1]

    # Arrays for RMS error over all states
    rms_err = np.asarray([0.0] * num_episodes)
    sum_rms_err = 0.0
    # rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v)))

    # create n-step agent
    print("Starting agent {}-step {}".format(step, agent_type))
    if agent_type.lower() == "sarsa":
        agent = Sarsa(mdp, s, step)
    elif agent_type.lower() == "expsarsa":
        agent = ExpSARSA(mdp, s, step)
    elif agent_type.lower() == "treebackup":
        agent = TreeBackup(mdp, s, step)
    elif agent_type.lower() == "qsigma":
        agent = QSigma(mdp, 0.5, s, step)
    else:
        raise Exception("Wrong type of agent")

    for run in range(num_runs):
        for i in range(num_episodes):
            agent.episode(discount, step_size, 10000)
            agent.init()
            rms_err[i] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v)))
        sum_rms_err += np.sum(rms_err)
        # Reset Q after a run
        agent.reset_Q()

    # averaged over num_runs and num_episodes
    return sum_rms_err / (num_runs * num_episodes)
Пример #5
0
def run_agent_RMS_Q(num_runs, num_episodes, discount, step_size, step=1):
    """ Run SARSA agent for num_episodes to get the Q values """
    mdp = RandomWalk(19)
    s = mdp.init()

    # ground truth for Q
    gt_Q = np.asarray(mdp.Q_equiprobable(discount)[1:-1])
    gt_Q_left = gt_Q[:, 0]
    gt_Q_right = gt_Q[:, 1]

    v = np.asarray([0.5] * mdp.num_states())
    v[0], v[-1] = 0.0, 0.0
    init_Q_left = np.asarray(mdp.value_to_Q(v, discount)[1:-1])[:, 0]
    init_Q_right = np.asarray(mdp.value_to_Q(v, discount)[1:-1])[:, 1]

    # Arrays for RMS error over all states
    rms_err_left = np.asarray([0.0] * (num_episodes + 1))  # Q[left]
    rms_err_right = np.asarray([0.0] * (num_episodes + 1))  # Q[right]
    sum_rms_err_left = np.asarray([0.0] * (num_episodes + 1))
    sum_rms_err_right = np.asarray([0.0] * (num_episodes + 1))
    rms_err_left[0] = np.sqrt(np.mean(np.square(init_Q_left - gt_Q_left)))
    rms_err_right[0] = np.sqrt(np.mean(np.square(init_Q_right - gt_Q_right)))

    # create n-step SARSA agent
    agent = Sarsa(mdp, s, step)
    for run in range(num_runs):
        for i in range(num_episodes):
            agent.episode(discount, step_size, 10000)
            agent.init()
            rms_err_left[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q[1:-1])[:, 0] - gt_Q_left)))
            rms_err_right[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q[1:-1])[:, 1] - gt_Q_right)))
        sum_rms_err_left += rms_err_left
        sum_rms_err_right += rms_err_right
        # Reset Q after a run
        agent.reset_Q()
    # averaged over num_runs
    return sum_rms_err_left / num_runs, sum_rms_err_right / num_runs