def __init__(self, SubAgentClass, actions, state_abstr=None, action_abstr=None, name_ext="abstr"): ''' Args: SubAgentClass (simple_rl.AgentClass) actions (list of str) state_abstr (StateAbstraction) state_abstr (ActionAbstraction) name_ext (str) ''' # Setup the abstracted agent. self.agent = SubAgentClass(actions=actions) self.action_abstr = ActionAbstraction( options=self.agent.actions, prim_actions=self.agent.actions ) if action_abstr is None else action_abstr self.state_abstr = StateAbstraction( {}) if state_abstr is None else state_abstr Agent.__init__(self, name=self.agent.name + "-" + name_ext, actions=self.action_abstr.get_actions())
def __init__(self, list_of_aa, prim_actions, level=0): ''' Args: list_of_aa (list) ''' self.list_of_aa = list_of_aa self.level = level self.prim_actions = prim_actions ActionAbstraction.__init__(self, options=self.get_actions(level), prim_actions=prim_actions)
def _create_default_abstractions(self, actions, state_abstr, action_abstr): ''' Summary: We here create the default abstractions. ''' if action_abstr is None: self.action_abstr = ActionAbstraction(options=actions, prim_actions=actions) else: self.action_abstr = action_abstr self.state_abstr = StateAbstraction( phi={}) if state_abstr is None else state_abstr
def make_directed_options_aa_from_sa_stack(mdp_distr, sa_stack): ''' Args: mdp_distr (MDPDistribution) sa_stack (StateAbstractionStack) Returns: (ActionAbstraction) ''' aa_stack = ActionAbstractionStack(list_of_aa=[], prim_actions=mdp_distr.get_actions()) for level in xrange(1, sa_stack.get_num_levels() + 1): # Make directed options for the current level. sa_stack.set_level(level) next_options = aa_helpers.get_directed_options_for_sa(mdp_distr, sa_stack, incl_self_loops=False) if not next_options: # Too many options, decrease abstracton ratio and continue. return False next_aa = ActionAbstraction(options=next_options, prim_actions=mdp_distr.get_actions()) aa_stack.add_aa(next_aa) return aa_stack
def get_policy_blocks_aa(mdp_distr, num_options=10, task_samples=20, incl_prim_actions=False): pb_options = make_policy_blocks_options(mdp_distr, num_options=num_options, task_samples=task_samples) if type(mdp_distr) is dict: first_mdp = mdp_distr.keys()[0] else: first_mdp = mdp_distr if incl_prim_actions: # Include the primitives. aa = ActionAbstraction(options=first_mdp.get_actions(), prim_actions=first_mdp.get_actions()) for o in pb_options: aa.add_option(o) return aa else: # Return just the options. return ActionAbstraction(options=pb_options, prim_actions=first_mdp.get_actions())
def get_directed_aa(mdp_distr, state_abs, incl_prim_actions=False, max_options=100): ''' Args: mdp_distr (dict) state_abs (StateAbstraction) incl_prim_actions (bool) max_options (int) Returns: (ActionAbstraction) ''' directed_options = action_abs.aa_helpers.get_directed_options_for_sa( mdp_distr, state_abs, incl_self_loops=True, max_options=max_options) term_prob = 1 - mdp_distr.get_gamma() if not directed_options: # No good option set found. return False if incl_prim_actions: # Include the primitives. aa = ActionAbstraction(options=mdp_distr.get_actions(), prim_actions=mdp_distr.get_actions(), prims_on_failure=False, term_prob=term_prob) for o in directed_options: aa.add_option(o) return aa else: # Return just the options. return ActionAbstraction(options=directed_options, prim_actions=mdp_distr.get_actions(), prims_on_failure=True, term_prob=term_prob)
def get_aa(mdp_distr, default=False): ''' Args: mdp (defaultdict) default (bool): If true, returns a blank ActionAbstraction Returns: (ActionAbstraction) ''' if default: return ActionAbstraction(options=mdp_distr.get_actions(), prim_actions=mdp_distr.get_actions()) return action_abs.aa_helpers.make_greedy_options(mdp_distr)
def get_aa_high_prob_opt_single_act(mdp_distr, state_abstr, delta=0.2): ''' Args: mdp_distr state_abstr (StateAbstraction) Summary: Computes an action abstraction where there exists an option that repeats a single primitive action, for each primitive action that was optimal *with high probability* in the ground state in the cluster. ''' # K: state, V: dict (K: act, V: probability) action_optimality_dict = state_abstr.get_act_opt_dict() # Compute options. options = [] for s_a in state_abstr.get_abs_states(): ground_states = state_abstr.get_ground_states_in_abs_state(s_a) # One option per action. for action in mdp_distr.get_actions(): list_of_state_with_a_optimal_high_pr = [] # Compute which states have high prob of being optimal. for s_g in ground_states: print "Pr(a = a^* \mid s_g)", s_g, action, action_optimality_dict[ s_g][action] if action_optimality_dict[s_g][action] > (1 - delta): list_of_state_with_a_optimal_high_pr.append(s_g) if len(list_of_state_with_a_optimal_high_pr) == 0: continue init_predicate = ContainsPredicate( list_of_items=list_of_state_with_a_optimal_high_pr) term_predicate = NotPredicate(init_predicate) policy_obj = Policy(action) o = Option(init_predicate=init_predicate, term_predicate=term_predicate, policy=policy_obj.get_action) options.append(o) return ActionAbstraction(options=options, prim_actions=mdp_distr.get_actions(), prims_on_failure=True)
def get_aa_opt_only_single_act(mdp_distr, state_abstr): ''' Args: mdp_distr state_abstr (StateAbstraction) Summary: Computes an action abstraction where there exists an option that repeats a single primitive action, for each primitive action that was optimal in the ground state in the cluster. ''' action_optimality_dict = state_abstr.get_act_opt_dict() # Compute options. options = [] for s_a in state_abstr.get_abs_states(): ground_states = state_abstr.get_ground_states_in_abs_state(s_a) # One option per action. for action in mdp_distr.get_actions(): list_of_state_with_a_optimal = [] for s_g in ground_states: if action in action_optimality_dict[s_g]: list_of_state_with_a_optimal.append(s_g) if len(list_of_state_with_a_optimal) == 0: continue init_predicate = ContainsPredicate( list_of_items=list_of_state_with_a_optimal) term_predicate = NotPredicate(init_predicate) policy_obj = Policy(action) o = Option(init_predicate=init_predicate, term_predicate=term_predicate, policy=policy_obj.get_action, term_prob=1 - mdp_distr.get_gamma()) options.append(o) return ActionAbstraction(options=options, prim_actions=mdp_distr.get_actions())
class AbstractionWrapper(Agent): def __init__(self, SubAgentClass, actions, state_abstr=None, action_abstr=None, name_ext="abstr"): ''' Args: SubAgentClass (simple_rl.AgentClass) actions (list of str) state_abstr (StateAbstraction) state_abstr (ActionAbstraction) name_ext (str) ''' # Setup the abstracted agent. self.agent = SubAgentClass(actions=actions) self.action_abstr = ActionAbstraction( options=self.agent.actions, prim_actions=self.agent.actions ) if action_abstr is None else action_abstr self.state_abstr = StateAbstraction( {}) if state_abstr is None else state_abstr Agent.__init__(self, name=self.agent.name + "-" + name_ext, actions=self.action_abstr.get_actions()) def act(self, ground_state, reward): ''' Args: ground_state (State) reward (float) Return: (str) ''' abstr_state = self.state_abstr.phi(ground_state) ground_action = self.action_abstr.act(self.agent, abstr_state, ground_state, reward) return ground_action def reset(self): # Write data. self.agent.reset() self.action_abstr.reset() def end_of_episode(self): self.agent.end_of_episode() self.action_abstr.end_of_episode()
def get_aa_single_act(mdp_distr, state_abstr): ''' Args: mdp_distr state_abstr (StateAbstraction) Summary: Computes an action abstraction where there exists an option that repeats a single primitive action, for each primitive action that was optimal in the cluster. ''' action_optimality_dict = state_abstr.get_act_opt_dict() options = [] # Compute options. for s_a in state_abstr.get_abs_states(): init_predicate = EqPredicate(y=s_a, func=state_abstr.phi) term_predicate = NeqPredicate(y=s_a, func=state_abstr.phi) ground_states = state_abstr.get_ground_states_in_abs_state(s_a) unique_a_star_in_cluster = set([]) for s_g in ground_states: for a_star in action_optimality_dict[s_g]: unique_a_star_in_cluster.add(a_star) for action in unique_a_star_in_cluster: policy_obj = Policy(action) o = Option(init_predicate=init_predicate, term_predicate=term_predicate, policy=policy_obj.get_action) options.append(o) return ActionAbstraction(options=options, prim_actions=mdp_distr.get_actions())
def add_layer_to_aa_stack(mdp_distr, sa_stack, aa_stack): ''' Args: mdp_distr (MDPDistribution) sa_stack (StateAbstractionStack) aa_stack (ActionAbstractionStack) Returns: (tuple): 1. (ActionAbstractionStack) 2. (MDPDistribution) 3. (bool) ''' if aa_stack.get_num_levels() > 0: abstr_mdp_distr = make_abstr_mdp.make_abstr_mdp_distr_multi_level( mdp_distr, sa_stack, aa_stack) else: abstr_mdp_distr = mdp_distr # Make options for the level + 1 height. sa_stack.set_level_to_max() next_options = aa_helpers.get_directed_options_for_sa( abstr_mdp_distr, sa_stack, incl_self_loops=False, max_options=1024 / (aa_stack.get_num_levels() + 1)) if not next_options: # Too many options, decrease abstracton ratio and continue. return aa_stack, True next_aa = ActionAbstraction(options=next_options, prim_actions=mdp_distr.get_actions()) aa_stack.add_aa(next_aa) return aa_stack, False
class AbstractionWrapper(Agent): def __init__(self, SubAgentClass, actions, mdp_name, max_option_steps=0, state_abstr=None, action_abstr=None, name_ext="abstr"): ''' Args: SubAgentClass (simple_rl.AgentClass) actions (list of str) mdp_name (str) state_abstr (StateAbstraction) state_abstr (ActionAbstraction) name_ext (str) ''' # Setup the abstracted agent. self._create_default_abstractions(actions, state_abstr, action_abstr) self.agent = SubAgentClass(actions=self.action_abstr.get_actions()) self.exp_directory = os.path.join(os.getcwdu(), "results", mdp_name, "options") self.reward_since_tracking = 0 self.max_option_steps = max_option_steps self.num_option_steps = 0 Agent.__init__(self, name=self.agent.name + "-" + name_ext, actions=self.action_abstr.get_actions()) self._setup_files() def _setup_files(self): ''' Summary: Creates and removes relevant directories/files. ''' if not os.path.exists(os.path.join(self.exp_directory)): os.makedirs(self.exp_directory) if os.path.exists( os.path.join(self.exp_directory, str(self.name)) + ".csv"): # Remove old os.remove( os.path.join(self.exp_directory, str(self.name)) + ".csv") def write_datum_to_file(self, datum): ''' Summary: Writes datum to file. ''' out_file = open( os.path.join(self.exp_directory, str(self.name)) + ".csv", "a+") out_file.write(str(datum) + ",") out_file.close() def _record_experience(self, ground_state, reward): ''' Args: abstr_state abstr_action reward next_abstr_state Summary: Tracks experiences to display plots in terms of options. ''' # if not self.action_abstr.is_next_step_continuing_option(ground_state): self.write_datum_to_file(self.reward_since_tracking) self.reward_since_tracking = 0 def _create_default_abstractions(self, actions, state_abstr, action_abstr): ''' Summary: We here create the default abstractions. ''' if action_abstr is None: self.action_abstr = ActionAbstraction(options=actions, prim_actions=actions) else: self.action_abstr = action_abstr self.state_abstr = StateAbstraction( phi={}) if state_abstr is None else state_abstr def act(self, ground_state, reward): ''' Args: ground_state (State) reward (float) Return: (str) ''' self.reward_since_tracking += reward if self.max_option_steps > 0: # We're counting action steps in terms of options. if self.num_option_steps == self.max_option_steps: # We're at the limit. self._record_experience(ground_state, reward) self.num_option_steps += 1 return "terminate" elif self.num_option_steps > self.max_option_steps: # Skip. return "terminate" elif not self.action_abstr.is_next_step_continuing_option( ground_state): # Taking a new option, count it and continue. self.num_option_steps += 1 self._record_experience(ground_state, reward) else: self._record_experience(ground_state, reward) abstr_state = self.state_abstr.phi(ground_state) # print ground_state, abstr_state, hash(ground_state) ground_action = self.action_abstr.act(self.agent, abstr_state, ground_state, reward) # print "ground_action", ground_action, type(ground_action), len(ground_action) return ground_action def reset(self): # Write data. out_file = open( os.path.join(self.exp_directory, str(self.name)) + ".csv", "a+") out_file.write("\n") out_file.close() self.agent.reset() self.action_abstr.reset() self.reward_since_tracking = 0 self.num_option_steps = 0 def new_task(self): self._reset_reward() def get_num_known_sa(self): return self.agent.get_num_known_sa() def _reset_reward(self): if isinstance(self.agent, RMaxAgent): self.agent._reset_reward() def end_of_episode(self): self.agent.end_of_episode() self.action_abstr.end_of_episode()