Exemplo n.º 1
0
def build_point_option_agent(mdp,
                             pairs,
                             agent=QLearningAgent,
                             policy='vi',
                             name='-abstr'):
    # pairs should be a List of pair.
    # Pair is conposed of two lists.
    # Each list has init/term states.
    goal_based_options = aa_helpers.make_point_options(mdp,
                                                       pairs,
                                                       policy=policy)
    goal_based_aa = ActionAbstraction(prim_actions=mdp.get_actions(),
                                      options=goal_based_options,
                                      use_prims=True)

    # num_feats = mdp.get_num_state_feats()
    option_agent = AbstractionWrapper(
        agent,
        agent_params={"actions": mdp.get_actions()},
        action_abstr=goal_based_aa,
        name_ext=name)
    # option_agent = AbstractionWrapper(LinearQAgent, agent_params={"actions":mdp.get_actions(), "num_features":num_feats}, action_abstr=goal_based_aa, name_ext=name)
    # option_agent = AbstractionWrapper(QLearningAgent, agent_params={"actions":mdp.get_actions()}, action_abstr=goal_based_aa, name_ext=name)

    return option_agent
Exemplo n.º 2
0
def build_subgoal_option_agent(mdp,
                               subgoals,
                               init_region,
                               agent=QLearningAgent,
                               vectors=None,
                               name='-abstr',
                               n_trajs=50,
                               n_steps=100,
                               classifier='list',
                               policy='vi'):
    # print('sbugoals=', subgoals)
    goal_based_options = aa_helpers.make_subgoal_options(mdp,
                                                         subgoals,
                                                         init_region,
                                                         vectors=vectors,
                                                         n_trajs=n_trajs,
                                                         n_steps=n_steps,
                                                         classifier=classifier,
                                                         policy=policy)
    goal_based_aa = ActionAbstraction(prim_actions=mdp.get_actions(),
                                      options=goal_based_options,
                                      use_prims=True)

    # num_feats = mdp.get_num_state_feats()
    option_agent = AbstractionWrapper(
        agent,
        agent_params={"actions": mdp.get_actions()},
        action_abstr=goal_based_aa,
        name_ext=name)

    return option_agent
Exemplo n.º 3
0
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
    ql_agent = QLearningAgent(actions=mdp.get_actions())
    rand_agent = RandomAgent(actions=mdp.get_actions())
    abstr_identity_agent = AbstractionWrapper(QLearningAgent, agent_params={"epsilon":0.9}, actions=mdp.get_actions())

    # Run experiment and make plot.
    run_agents_on_mdp([ql_agent, rand_agent, abstr_identity_agent], mdp, instances=5, episodes=100, steps=150, open_plot=open_plot)
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
    ql_agent = QLearningAgent(actions=mdp_distr.get_actions())
    rand_agent = RandomAgent(actions=mdp_distr.get_actions())

    # Make goal-based option agent.
    goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
    goal_based_aa = ActionAbstraction(prim_actions=mdp_distr.get_actions(),
                                      options=goal_based_options)
    option_agent = AbstractionWrapper(QLearningAgent,
                                      actions=mdp_distr.get_actions(),
                                      action_abstr=goal_based_aa)

    # Run experiment and make plot.
    run_agents_lifelong([ql_agent, rand_agent, option_agent],
                        mdp_distr,
                        samples=10,
                        episodes=100,
                        steps=150,
                        open_plot=open_plot)
Exemplo n.º 5
0
def branching_factor_experiment(min_options=0,
                                max_options=20,
                                increment=2,
                                instances=5,
                                epsilon=0.05):
    '''
    Args:
        min_options (int)
        max_options (int)
        increment (int)

    Summary:
        Runs an experiment contrasting learning performance for different # options.
    '''
    # Define MDP.
    grid_size = 7
    mdp = FourRoomMDP(width=grid_size,
                      height=grid_size,
                      goal_locs=[(grid_size, grid_size)])

    # Make State Abstraction.
    states, _ = ah.compute_reachable_state_space(mdp, sample_rate=50)
    state_abstr = core.compute_phi_given_m(mdp,
                                           four_rooms_predicate_9x9,
                                           level=1,
                                           states=states)

    x_axis = range(min_options, max_options + 1, increment)
    y_axis = defaultdict(list)  #[] #[0] * len(x_axis)
    conf_intervals = defaultdict(list)
    num_options_performance = defaultdict(lambda: defaultdict(list))

    # Choose dependent variable (either #steps per episode or #episodes).
    d_var_range = [(20, 5), (40, 250), (400, 2500)]

    for steps, episodes in d_var_range:
        print "steps, episodes", steps, episodes

        # Evaluate.
        for i, instance in enumerate(range(instances)):
            print "\tInstance", instance + 1, "of", str(instances) + "."

            # Make initial Options.
            for num_options in x_axis:

                options, _ = make_near_optimal_phi_relative_options(
                    mdp,
                    state_abstr,
                    'eps-greedy',
                    num_rand_opts=num_options - 1,
                    eps=epsilon)
                action_abstr = ActionAbstraction(
                    options=options, prim_actions=mdp.get_actions())

                # Make agent.
                AgentClass = RMaxAgent  # DoubleQAgent, QLearningAgent, SarsaAgent
                sa_aa_agent = AbstractionWrapper(
                    AgentClass,
                    agent_params={"actions": mdp.get_actions()},
                    state_abstr=state_abstr,
                    action_abstr=action_abstr,
                    name_ext="-$\\phi,O$")

                _, _, value_per_episode = run_single_agent_on_mdp(
                    sa_aa_agent, mdp, episodes=episodes, steps=steps)
                mdp.reset()

                num_options_performance[(steps, episodes)][num_options].append(
                    value_per_episode[-1])

    ############
    # Other types

    # Just state abstraction.
    steps, episodes = d_var_range[-1][0], d_var_range[-1][1]
    sa_agent = AbstractionWrapper(AgentClass,
                                  agent_params={"actions": mdp.get_actions()},
                                  state_abstr=state_abstr,
                                  action_abstr=None,
                                  name_ext="-$\\phi$")
    _, _, value_per_episode = run_single_agent_on_mdp(sa_agent,
                                                      mdp,
                                                      episodes=episodes,
                                                      steps=steps)
    num_options_performance[(d_var_range[-1][0],
                             d_var_range[-1][1])]["phi"].append(
                                 value_per_episode[-1])
    y_axis["phi"] = [value_per_episode[-1]]

    # Run random options.
    options = make_fixed_random_options(mdp, state_abstr)
    action_abstr = ActionAbstraction(options=options,
                                     prim_actions=mdp.get_actions())
    AgentClass = QLearningAgent
    rand_opt_agent = AbstractionWrapper(
        AgentClass,
        agent_params={"actions": mdp.get_actions()},
        state_abstr=state_abstr,
        action_abstr=action_abstr,
        name_ext="-$\\phi,O_{\text{random}}$")
    _, _, value_per_episode = run_single_agent_on_mdp(rand_opt_agent,
                                                      mdp,
                                                      episodes=episodes,
                                                      steps=steps)
    num_options_performance[(d_var_range[-1][0],
                             d_var_range[-1][1])]["random"].append(
                                 value_per_episode[-1])
    y_axis["random"] = [value_per_episode[-1]]

    # Makeoptimal agent.
    value_iter = ValueIteration(mdp)
    value_iter.run_vi()
    optimal_agent = FixedPolicyAgent(value_iter.policy)
    _, _, value_per_episode = run_single_agent_on_mdp(optimal_agent,
                                                      mdp,
                                                      episodes=episodes,
                                                      steps=steps)
    y_axis["optimal"] = [value_per_episode[-1]]
    total_steps = d_var_range[0][0] * d_var_range[0][1]

    # Confidence intervals.
    for dependent_var in d_var_range:
        for num_options in x_axis:
            # Compute mean and standard error.
            avg_for_n = float(
                sum(num_options_performance[dependent_var]
                    [num_options])) / instances
            std_deviation = np.std(
                num_options_performance[dependent_var][num_options])
            std_error = 1.96 * (std_deviation / math.sqrt(
                len(num_options_performance[dependent_var][num_options])))
            y_axis[dependent_var].append(avg_for_n)
            conf_intervals[dependent_var].append(std_error)

    plt.xlabel("$|O_\\phi|$")
    plt.xlim([1, len(x_axis)])
    plt.ylabel("$V^{\hat{\pi}_{O_\\phi}}(s_0)$")
    plt.tight_layout()  # Keeps the spacing nice.

    # Add just state abstraction.
    ep_val_del_q_phi = y_axis["phi"]
    label = "$O_{\\phi}$"  #" N=1e" + str(str(total_steps).count("0")) + "$"
    plt.plot(x_axis, [ep_val_del_q_phi] * len(x_axis),
             marker="+",
             linestyle="--",
             linewidth=1.0,
             color=PLOT_COLORS[-1],
             label=label)

    # Add random options.
    ep_val_del_q = y_axis["random"]
    label = "$O_{random}$"  #" N=1e" + str(str(total_steps).count("0")) + "$"
    plt.plot(x_axis, [ep_val_del_q] * len(x_axis),
             marker="x",
             linestyle="--",
             linewidth=1.0,
             color=PLOT_COLORS[0])  #, label=label)

    # Add optimal.
    ep_val_optimal = y_axis["optimal"]
    plt.plot(x_axis, [ep_val_optimal] * len(x_axis),
             linestyle="-",
             linewidth=1.0,
             color=PLOT_COLORS[1])  #, label="$\\pi^*$")

    for i, dependent_var in enumerate(d_var_range):
        total_steps = dependent_var[0] * dependent_var[1]
        label = "$O_{\\phi,Q_\\varepsilon^*}, N=1e" + str(
            str(total_steps).count("0")) + "$"
        plt.plot(x_axis,
                 y_axis[dependent_var],
                 marker="x",
                 color=PLOT_COLORS[i + 2],
                 linewidth=1.5,
                 label=label)

        # Confidence intervals.
        top = np.add(y_axis[dependent_var], conf_intervals[dependent_var])
        bot = np.subtract(y_axis[dependent_var], conf_intervals[dependent_var])
        plt.fill_between(x_axis,
                         top,
                         bot,
                         alpha=0.25,
                         color=PLOT_COLORS[i + 2])

    plt.legend()
    plt.savefig("branching_factor_results.pdf", format="pdf")
    plt.cla()
    plt.close()
Exemplo n.º 6
0
def run_learning_experiment():
    """
    Summary:
        Builds different sets of options and contrasts how RL algorithms
        perform when learning with them.
    """
    # Define MDP.
    width, height = 11, 11
    mdp = FourRoomMDP(width=width,
                      height=height,
                      goal_locs=[(width, height)],
                      slip_prob=0.05)
    actions = mdp.get_actions()

    # Make State Abstraction.
    states, _ = ah.compute_reachable_state_space(mdp, sample_rate=50)
    if isinstance(mdp, FourRoomMDP):
        predicate = four_rooms_predicate_11x11
    else:
        predicate = reachable_in_n_steps_predicate

    state_abstr = core.compute_phi_given_m(mdp,
                                           predicate,
                                           level=1,
                                           states=states)

    # Make initial Options.
    num_rand_opts_to_add = 2
    options, _ = make_near_optimal_phi_relative_options(
        mdp,
        state_abstr,
        'eps-greedy',
        num_rand_opts=num_rand_opts_to_add,
        eps=0.05)
    action_abstr = ActionAbstraction(options=options, prim_actions=actions)
    action_abstr_w_prims = ActionAbstraction(options=options,
                                             prim_actions=actions,
                                             incl_primitives=True)

    # Find eigen options.
    # num_eigen_options = max(1, num_rand_opts_to_add - 1)
    # eigen_options_init_all = find_eigenoptions(mdp, num_options=num_eigen_options, init_everywhere=True)
    # eigen_options_w_prims = find_eigenoptions(mdp, num_options=num_eigen_options, init_everywhere=False)
    # eigen_aa_init_all = ActionAbstraction(options=eigen_options_init_all, prim_actions=actions, incl_primitives=False)
    # eigen_aa_w_prims = ActionAbstraction(options=eigen_options_w_prims, prim_actions=actions, incl_primitives=True)

    # Make agent.
    AgentClass = QLearningAgent  #QLearningAgent #DoubleQAgent #DelayedQAgent
    ql_agent = AgentClass(mdp.get_actions())
    sa_aa_agent = AbstractionWrapper(AgentClass,
                                     agent_params={"actions": actions},
                                     state_abstr=state_abstr,
                                     action_abstr=action_abstr_w_prims,
                                     name_ext="-$\\phi,O$")
    aa_agent = AbstractionWrapper(AgentClass,
                                  agent_params={"actions": actions},
                                  state_abstr=None,
                                  action_abstr=action_abstr_w_prims,
                                  name_ext="-$O$")
    # aa_agent = AbstractionWrapper(AgentClass, agent_params={"actions": actions}, state_abstr=None, action_abstr=action_abstr_w_prims, name_ext="-$\\phi$")
    # Eigen agents.
    # eigen_agent_init_all = AbstractionWrapper(AgentClass, agent_params={"actions": actions}, state_abstr=None, action_abstr=eigen_aa_init_all, name_ext="-eigen_all")
    # eigen_agent_w_prims = AbstractionWrapper(AgentClass, agent_params={"actions": actions}, state_abstr=None, action_abstr=eigen_aa_w_prims, name_ext="-eigen_w_prims")
    agents = [ql_agent, aa_agent,
              sa_aa_agent]  #, eigen_agent_init_all, eigen_agent_w_prims]

    # Run.
    if isinstance(mdp, FourRoomMDP):
        run_agents_on_mdp(agents, mdp, instances=10, episodes=500, steps=50)
    else:
        run_agents_on_mdp(agents, mdp, instances=10, episodes=100, steps=10)