def __init__(self, SubAgentClass, actions, agent_params={}, state_abstr=None, action_abstr=None, name_ext="abstr"): ''' Args: SubAgentClass (simple_rl.AgentClass) actions (list of str) agent_params (dict): A dictionary with key=param_name, val=param_value, to be given to the constructor for the instance of @SubAgentClass. state_abstr (StateAbstraction) state_abstr (ActionAbstraction) name_ext (str) ''' # Setup the abstracted agent. self.agent = SubAgentClass(actions=actions, **agent_params) self.action_abstr = ActionAbstraction( prim_actions=self.agent.actions ) if action_abstr is None else action_abstr self.state_abstr = StateAbstraction( {}) if state_abstr is None else state_abstr Agent.__init__(self, name=self.agent.name + "-" + name_ext, actions=self.action_abstr.get_actions())
def make_sa(mdp, indic_func=ind_funcs._q_eps_approx_indicator, state_class=State, epsilon=0.0, save=False, track_act_opt_pr=False): ''' Args: mdp (MDP) state_class (Class) epsilon (float) Summary: Creates and saves a state abstraction. ''' print(" Making state abstraction... ") q_equiv_sa = StateAbstraction(phi={}, track_act_opt_pr=track_act_opt_pr) if isinstance(mdp, MDPDistribution): q_equiv_sa = make_multitask_sa(mdp, state_class=state_class, indic_func=indic_func, epsilon=epsilon, track_act_opt_pr=track_act_opt_pr) else: q_equiv_sa = make_singletask_sa(mdp, state_class=state_class, indic_func=indic_func, epsilon=epsilon, track_act_opt_pr=track_act_opt_pr) if save: save_sa(q_equiv_sa, str(mdp) + ".p") return q_equiv_sa
def compute_phi_given_m(m, predicate, level, states): ''' Args: m (simple_rl.MDP) Returns: phi (simple_rl.abstraction.StateAbstraction) ''' # Group states according to given predicate phi = {} abstr_state_idx = 0 for i in range(len(states)): in_existing_cluster = False for j in range(i): states_equiv = predicate(states[i], states[j], m) if states_equiv: phi[states[i]] = phi[states[j]] in_existing_cluster = True if not in_existing_cluster: phi[states[i]] = State(data='lvl' + str(level) + '_' + str(abstr_state_idx)) abstr_state_idx += 1 print "\t\t|S|", len(states) print "\t\t|S_phi|", abstr_state_idx return StateAbstraction(phi)
class AbstractionWrapper(Agent): def __init__(self, SubAgentClass, actions, agent_params={}, state_abstr=None, action_abstr=None, name_ext="abstr"): ''' Args: SubAgentClass (simple_rl.AgentClass) actions (list of str) agent_params (dict): A dictionary with key=param_name, val=param_value, to be given to the constructor for the instance of @SubAgentClass. state_abstr (StateAbstraction) state_abstr (ActionAbstraction) name_ext (str) ''' # Setup the abstracted agent. self.agent = SubAgentClass(actions=actions, **agent_params) self.action_abstr = ActionAbstraction( prim_actions=self.agent.actions ) if action_abstr is None else action_abstr self.state_abstr = StateAbstraction( {}) if state_abstr is None else state_abstr Agent.__init__(self, name=self.agent.name + "-" + name_ext, actions=self.action_abstr.get_actions()) def act(self, ground_state, reward): ''' Args: ground_state (State) reward (float) Return: (str) ''' abstr_state = self.state_abstr.phi(ground_state) ground_action = self.action_abstr.act(self.agent, abstr_state, ground_state, reward) return ground_action def reset(self): # Write data. self.agent.reset() self.action_abstr.reset() def end_of_episode(self): self.agent.end_of_episode() self.action_abstr.end_of_episode()
def convert_prob_sa_to_sa(prob_sa): ''' Args: prob_sa (simple_rl.state_abs.ProbStateAbstraction) Returns: (simple_rl.state_abs.StateAbstraction) ''' new_phi = {} for s_g in prob_sa.abstr_dist.keys(): new_phi[s_g] = prob_sa.abstr_dist[s_g].keys()[ prob_sa.abstr_dist[s_g].values().index( max(prob_sa.abstr_dist[s_g].values()))] return StateAbstraction(new_phi)
def merge_state_abstr(list_of_state_abstr, states): ''' Args: list_of_state_abstr (list) states (list) Returns: (simple_rl.StateAbstraction) Summary: Merges all state abstractions in @list_of_state_abstr by taking the intersection over safe clusterability. ''' safe_state_pairings = defaultdict(list) # For each state pair... for s_1, s_2 in itertools.product(states, repeat=2): safely_clustered_pair = True for state_abstr in list_of_state_abstr: if state_abstr.phi(s_1) != state_abstr.phi(s_2): safely_clustered_pair = False break if safely_clustered_pair: safe_state_pairings[s_1] += [s_2] safe_state_pairings[s_2] += [s_1] # Now we have a dict of safe state pairs, merge them. phi = defaultdict(list) cluster_counter = 0 for state in safe_state_pairings.keys(): for safe_other_state in safe_state_pairings[state]: if state not in phi.keys() and safe_other_state not in phi.keys(): phi[state] = State(cluster_counter) phi[safe_other_state] = State(cluster_counter) elif state in phi.keys(): phi[safe_other_state] = phi[state] elif safe_other_state in phi.keys(): phi[state] = phi[safe_other_state] # Increment counter cluster_counter += 1 return StateAbstraction(phi, states)
def __init__(self, ground_mdp, state_abstr=None, action_abstr=None, vi_sample_rate=5, max_iterations=1000, amdp_sample_rate=5, delta=0.001): ''' Args: ground_mdp (simple_rl.MDP) state_abstr (simple_rl.StateAbstraction) action_abstr (simple_rl.ActionAbstraction) vi_sample_rate (int): Num samples per transition for running VI. max_iterations (int): Usual VI # Iteration bound. amdp_sample_rate (int): Num samples per abstract transition to use for computing R_abstract, T_abstract. ''' self.ground_mdp = ground_mdp # Grab ground state space. vi = ValueIteration(self.ground_mdp, delta=0.001, max_iterations=1000, sample_rate=5) state_space = vi.get_states() # Make the abstract MDP. self.state_abstr = state_abstr if state_abstr is not None else StateAbstraction( ground_state_space=state_space) self.action_abstr = action_abstr if action_abstr is not None else ActionAbstraction( prim_actions=ground_mdp.get_actions()) abstr_mdp = abstr_mdp_funcs.make_abstr_mdp( ground_mdp, self.state_abstr, self.action_abstr, step_cost=0.0, sample_rate=amdp_sample_rate) # Create VI with the abstract MDP. ValueIteration.__init__(self, abstr_mdp, vi_sample_rate, delta, max_iterations)
def make_singletask_sa(mdp, indic_func, state_class, epsilon=0.0, aa_single_act=False, prob_of_mdp=1.0, track_act_opt_pr=False): ''' Args: mdp (MDP) indic_func (S x S --> {0,1}) state_class (Class) epsilon (float) Returns: (StateAbstraction) ''' print("\tRunning VI...", ) sys.stdout.flush() # Run VI if isinstance(mdp, MDPDistribution): mdp = mdp.sample() vi = ValueIteration(mdp) iters, val = vi.run_vi() print(" done.") print("\tMaking state abstraction...", ) sys.stdout.flush() sa = StateAbstraction(phi={}, state_class=state_class, track_act_opt_pr=track_act_opt_pr) clusters = defaultdict(list) num_states = len(vi.get_states()) actions = mdp.get_actions() # Find state pairs that satisfy the condition. for i, state_x in enumerate(vi.get_states()): sys.stdout.flush() clusters[state_x] = [state_x] for state_y in vi.get_states()[i:]: if not (state_x == state_y) and indic_func( state_x, state_y, vi, actions, epsilon=epsilon): clusters[state_x].append(state_y) clusters[state_y].append(state_x) print("making clusters...", ) sys.stdout.flush() # Build SA. for i, state in enumerate(clusters.keys()): new_cluster = clusters[state] sa.make_cluster(new_cluster) # Destroy old so we don't double up. for s in clusters[state]: if s in clusters.keys(): clusters.pop(s) if aa_single_act: # Put all optimal actions in a set associated with the ground state. for ground_s in sa.get_ground_states(): a_star_set = set(vi.get_max_q_actions(ground_s)) sa.set_actions_state_opt_dict(ground_s, a_star_set, prob_of_mdp) print(" done.") print("\tGround States:", num_states) print("\tAbstract:", sa.get_num_abstr_states()) print() return sa
def make_singletask_sa(mdp, indic_func, state_class, epsilon=0.0, aa_single_act=False, prob_of_mdp=1.0, track_act_opt_pr=False): ''' Args: mdp (MDP) indic_func (S x S --> {0,1}) state_class (Class) epsilon (float) Returns: (StateAbstraction) ''' print("\tRunning VI...",) sys.stdout.flush() # Run VI if isinstance(mdp, MDPDistribution): mdp = mdp.sample() vi = ValueIteration(mdp) iters, val = vi.run_vi() print(" done.") print("\tMaking state abstraction...",) sys.stdout.flush() sa = StateAbstraction(phi={}, state_class=state_class, track_act_opt_pr=track_act_opt_pr) clusters = defaultdict(list) num_states = len(vi.get_states()) actions = mdp.get_actions() # Find state pairs that satisfy the condition. for i, state_x in enumerate(vi.get_states()): sys.stdout.flush() clusters[state_x] = [state_x] for state_y in vi.get_states()[i:]: if not (state_x == state_y) and indic_func(state_x, state_y, vi, actions, epsilon=epsilon): clusters[state_x].append(state_y) clusters[state_y].append(state_x) print("making clusters...",) sys.stdout.flush() # Build SA. for i, state in enumerate(clusters.keys()): new_cluster = clusters[state] sa.make_cluster(new_cluster) # Destroy old so we don't double up. for s in clusters[state]: if s in clusters.keys(): clusters.pop(s) if aa_single_act: # Put all optimal actions in a set associated with the ground state. for ground_s in sa.get_ground_states(): a_star_set = set(vi.get_max_q_actions(ground_s)) sa.set_actions_state_opt_dict(ground_s, a_star_set, prob_of_mdp) print(" done.") print("\tGround States:", num_states) print("\tAbstract:", sa.get_num_abstr_states()) print() return sa