Пример #1
0
    def __init__(self,
                 SubAgentClass,
                 actions,
                 agent_params={},
                 state_abstr=None,
                 action_abstr=None,
                 name_ext="abstr"):
        '''
        Args:
            SubAgentClass (simple_rl.AgentClass)
            actions (list of str)
            agent_params (dict): A dictionary with key=param_name, val=param_value,
                to be given to the constructor for the instance of @SubAgentClass.
            state_abstr (StateAbstraction)
            state_abstr (ActionAbstraction)
            name_ext (str)
        '''

        # Setup the abstracted agent.
        self.agent = SubAgentClass(actions=actions, **agent_params)
        self.action_abstr = ActionAbstraction(
            prim_actions=self.agent.actions
        ) if action_abstr is None else action_abstr
        self.state_abstr = StateAbstraction(
            {}) if state_abstr is None else state_abstr

        Agent.__init__(self,
                       name=self.agent.name + "-" + name_ext,
                       actions=self.action_abstr.get_actions())
Пример #2
0
def make_sa(mdp,
            indic_func=ind_funcs._q_eps_approx_indicator,
            state_class=State,
            epsilon=0.0,
            save=False,
            track_act_opt_pr=False):
    '''
    Args:
        mdp (MDP)
        state_class (Class)
        epsilon (float)

    Summary:
        Creates and saves a state abstraction.
    '''
    print("  Making state abstraction... ")
    q_equiv_sa = StateAbstraction(phi={}, track_act_opt_pr=track_act_opt_pr)
    if isinstance(mdp, MDPDistribution):
        q_equiv_sa = make_multitask_sa(mdp,
                                       state_class=state_class,
                                       indic_func=indic_func,
                                       epsilon=epsilon,
                                       track_act_opt_pr=track_act_opt_pr)
    else:
        q_equiv_sa = make_singletask_sa(mdp,
                                        state_class=state_class,
                                        indic_func=indic_func,
                                        epsilon=epsilon,
                                        track_act_opt_pr=track_act_opt_pr)

    if save:
        save_sa(q_equiv_sa, str(mdp) + ".p")

    return q_equiv_sa
def compute_phi_given_m(m, predicate, level, states):
    '''
    Args:
        m (simple_rl.MDP)

    Returns:
        phi (simple_rl.abstraction.StateAbstraction)
    '''
    # Group states according to given predicate
    phi = {}
    abstr_state_idx = 0
    for i in range(len(states)):
        in_existing_cluster = False
        for j in range(i):
            states_equiv = predicate(states[i], states[j], m)
            if states_equiv:
                phi[states[i]] = phi[states[j]]
                in_existing_cluster = True

        if not in_existing_cluster:
            phi[states[i]] = State(data='lvl' + str(level) + '_' +
                                   str(abstr_state_idx))
            abstr_state_idx += 1
    print "\t\t|S|", len(states)
    print "\t\t|S_phi|", abstr_state_idx

    return StateAbstraction(phi)
Пример #4
0
class AbstractionWrapper(Agent):
    def __init__(self,
                 SubAgentClass,
                 actions,
                 agent_params={},
                 state_abstr=None,
                 action_abstr=None,
                 name_ext="abstr"):
        '''
        Args:
            SubAgentClass (simple_rl.AgentClass)
            actions (list of str)
            agent_params (dict): A dictionary with key=param_name, val=param_value,
                to be given to the constructor for the instance of @SubAgentClass.
            state_abstr (StateAbstraction)
            state_abstr (ActionAbstraction)
            name_ext (str)
        '''

        # Setup the abstracted agent.
        self.agent = SubAgentClass(actions=actions, **agent_params)
        self.action_abstr = ActionAbstraction(
            prim_actions=self.agent.actions
        ) if action_abstr is None else action_abstr
        self.state_abstr = StateAbstraction(
            {}) if state_abstr is None else state_abstr

        Agent.__init__(self,
                       name=self.agent.name + "-" + name_ext,
                       actions=self.action_abstr.get_actions())

    def act(self, ground_state, reward):
        '''
        Args:
            ground_state (State)
            reward (float)

        Return:
            (str)
        '''
        abstr_state = self.state_abstr.phi(ground_state)
        ground_action = self.action_abstr.act(self.agent, abstr_state,
                                              ground_state, reward)

        return ground_action

    def reset(self):
        # Write data.
        self.agent.reset()
        self.action_abstr.reset()

    def end_of_episode(self):
        self.agent.end_of_episode()
        self.action_abstr.end_of_episode()
Пример #5
0
def convert_prob_sa_to_sa(prob_sa):
    '''
    Args:
        prob_sa (simple_rl.state_abs.ProbStateAbstraction)

    Returns:
        (simple_rl.state_abs.StateAbstraction)
    '''
    new_phi = {}

    for s_g in prob_sa.abstr_dist.keys():
        new_phi[s_g] = prob_sa.abstr_dist[s_g].keys()[
            prob_sa.abstr_dist[s_g].values().index(
                max(prob_sa.abstr_dist[s_g].values()))]

    return StateAbstraction(new_phi)
Пример #6
0
def merge_state_abstr(list_of_state_abstr, states):
    '''
    Args:
        list_of_state_abstr (list)
        states (list)

    Returns:
        (simple_rl.StateAbstraction)

    Summary:
        Merges all state abstractions in @list_of_state_abstr by taking the
        intersection over safe clusterability.
    '''

    safe_state_pairings = defaultdict(list)
    # For each state pair...
    for s_1, s_2 in itertools.product(states, repeat=2):
        safely_clustered_pair = True
        for state_abstr in list_of_state_abstr:
            if state_abstr.phi(s_1) != state_abstr.phi(s_2):
                safely_clustered_pair = False
                break

        if safely_clustered_pair:
            safe_state_pairings[s_1] += [s_2]
            safe_state_pairings[s_2] += [s_1]

    # Now we have a dict of safe state pairs, merge them.
    phi = defaultdict(list)
    cluster_counter = 0
    for state in safe_state_pairings.keys():
        for safe_other_state in safe_state_pairings[state]:
            if state not in phi.keys() and safe_other_state not in phi.keys():
                phi[state] = State(cluster_counter)
                phi[safe_other_state] = State(cluster_counter)
            elif state in phi.keys():
                phi[safe_other_state] = phi[state]
            elif safe_other_state in phi.keys():
                phi[state] = phi[safe_other_state]

            # Increment counter
            cluster_counter += 1

    return StateAbstraction(phi, states)
    def __init__(self,
                 ground_mdp,
                 state_abstr=None,
                 action_abstr=None,
                 vi_sample_rate=5,
                 max_iterations=1000,
                 amdp_sample_rate=5,
                 delta=0.001):
        '''
        Args:
            ground_mdp (simple_rl.MDP)
            state_abstr (simple_rl.StateAbstraction)
            action_abstr (simple_rl.ActionAbstraction)
            vi_sample_rate (int): Num samples per transition for running VI.
            max_iterations (int): Usual VI # Iteration bound.
            amdp_sample_rate (int): Num samples per abstract transition to use for computing R_abstract, T_abstract.
        '''
        self.ground_mdp = ground_mdp

        # Grab ground state space.
        vi = ValueIteration(self.ground_mdp,
                            delta=0.001,
                            max_iterations=1000,
                            sample_rate=5)
        state_space = vi.get_states()

        # Make the abstract MDP.
        self.state_abstr = state_abstr if state_abstr is not None else StateAbstraction(
            ground_state_space=state_space)
        self.action_abstr = action_abstr if action_abstr is not None else ActionAbstraction(
            prim_actions=ground_mdp.get_actions())
        abstr_mdp = abstr_mdp_funcs.make_abstr_mdp(
            ground_mdp,
            self.state_abstr,
            self.action_abstr,
            step_cost=0.0,
            sample_rate=amdp_sample_rate)

        # Create VI with the abstract MDP.
        ValueIteration.__init__(self, abstr_mdp, vi_sample_rate, delta,
                                max_iterations)
Пример #8
0
def make_singletask_sa(mdp,
                       indic_func,
                       state_class,
                       epsilon=0.0,
                       aa_single_act=False,
                       prob_of_mdp=1.0,
                       track_act_opt_pr=False):
    '''
    Args:
        mdp (MDP)
        indic_func (S x S --> {0,1})
        state_class (Class)
        epsilon (float)

    Returns:
        (StateAbstraction)
    '''

    print("\tRunning VI...", )
    sys.stdout.flush()
    # Run VI
    if isinstance(mdp, MDPDistribution):
        mdp = mdp.sample()

    vi = ValueIteration(mdp)
    iters, val = vi.run_vi()
    print(" done.")

    print("\tMaking state abstraction...", )
    sys.stdout.flush()
    sa = StateAbstraction(phi={},
                          state_class=state_class,
                          track_act_opt_pr=track_act_opt_pr)
    clusters = defaultdict(list)
    num_states = len(vi.get_states())

    actions = mdp.get_actions()
    # Find state pairs that satisfy the condition.
    for i, state_x in enumerate(vi.get_states()):
        sys.stdout.flush()
        clusters[state_x] = [state_x]

        for state_y in vi.get_states()[i:]:
            if not (state_x == state_y) and indic_func(
                    state_x, state_y, vi, actions, epsilon=epsilon):
                clusters[state_x].append(state_y)
                clusters[state_y].append(state_x)

    print("making clusters...", )
    sys.stdout.flush()

    # Build SA.
    for i, state in enumerate(clusters.keys()):
        new_cluster = clusters[state]
        sa.make_cluster(new_cluster)

        # Destroy old so we don't double up.
        for s in clusters[state]:
            if s in clusters.keys():
                clusters.pop(s)

    if aa_single_act:
        # Put all optimal actions in a set associated with the ground state.
        for ground_s in sa.get_ground_states():
            a_star_set = set(vi.get_max_q_actions(ground_s))
            sa.set_actions_state_opt_dict(ground_s, a_star_set, prob_of_mdp)

    print(" done.")
    print("\tGround States:", num_states)
    print("\tAbstract:", sa.get_num_abstr_states())
    print()

    return sa
Пример #9
0
def make_singletask_sa(mdp, indic_func, state_class, epsilon=0.0, aa_single_act=False, prob_of_mdp=1.0, track_act_opt_pr=False):
    '''
    Args:
        mdp (MDP)
        indic_func (S x S --> {0,1})
        state_class (Class)
        epsilon (float)

    Returns:
        (StateAbstraction)
    '''

    print("\tRunning VI...",)
    sys.stdout.flush()
    # Run VI
    if isinstance(mdp, MDPDistribution):
        mdp = mdp.sample()

    vi = ValueIteration(mdp)
    iters, val = vi.run_vi()
    print(" done.")

    print("\tMaking state abstraction...",)
    sys.stdout.flush()
    sa = StateAbstraction(phi={}, state_class=state_class, track_act_opt_pr=track_act_opt_pr)
    clusters = defaultdict(list)
    num_states = len(vi.get_states())

    actions = mdp.get_actions()
    # Find state pairs that satisfy the condition.
    for i, state_x in enumerate(vi.get_states()):
        sys.stdout.flush()
        clusters[state_x] = [state_x]

        for state_y in vi.get_states()[i:]:
            if not (state_x == state_y) and indic_func(state_x, state_y, vi, actions, epsilon=epsilon):
                clusters[state_x].append(state_y)
                clusters[state_y].append(state_x)

    print("making clusters...",)
    sys.stdout.flush()
    
    # Build SA.
    for i, state in enumerate(clusters.keys()):
        new_cluster = clusters[state]
        sa.make_cluster(new_cluster)

        # Destroy old so we don't double up.
        for s in clusters[state]:
            if s in clusters.keys():
                clusters.pop(s)
    
    if aa_single_act:
        # Put all optimal actions in a set associated with the ground state.
        for ground_s in sa.get_ground_states():
            a_star_set = set(vi.get_max_q_actions(ground_s))
            sa.set_actions_state_opt_dict(ground_s, a_star_set, prob_of_mdp)

    print(" done.")
    print("\tGround States:", num_states)
    print("\tAbstract:", sa.get_num_abstr_states())
    print()

    return sa