Пример #1
0
    def __init__(self,
                 SubAgentClass,
                 actions,
                 state_abstr=None,
                 action_abstr=None,
                 name_ext="abstr"):
        '''
        Args:
            SubAgentClass (simple_rl.AgentClass)
            actions (list of str)
            state_abstr (StateAbstraction)
            state_abstr (ActionAbstraction)
            name_ext (str)
        '''

        # Setup the abstracted agent.
        self.agent = SubAgentClass(actions=actions)
        self.action_abstr = ActionAbstraction(
            options=self.agent.actions, prim_actions=self.agent.actions
        ) if action_abstr is None else action_abstr
        self.state_abstr = StateAbstraction(
            {}) if state_abstr is None else state_abstr

        Agent.__init__(self,
                       name=self.agent.name + "-" + name_ext,
                       actions=self.action_abstr.get_actions())
Пример #2
0
 def __init__(self, list_of_aa, prim_actions, level=0):
     '''
     Args:
         list_of_aa (list)
     '''
     self.list_of_aa = list_of_aa
     self.level = level
     self.prim_actions = prim_actions
     ActionAbstraction.__init__(self,
                                options=self.get_actions(level),
                                prim_actions=prim_actions)
    def _create_default_abstractions(self, actions, state_abstr, action_abstr):
        '''
        Summary:
            We here create the default abstractions.
        '''
        if action_abstr is None:
            self.action_abstr = ActionAbstraction(options=actions,
                                                  prim_actions=actions)
        else:
            self.action_abstr = action_abstr

        self.state_abstr = StateAbstraction(
            phi={}) if state_abstr is None else state_abstr
def make_directed_options_aa_from_sa_stack(mdp_distr, sa_stack):
    '''
    Args:
        mdp_distr (MDPDistribution)
        sa_stack (StateAbstractionStack)

    Returns:
        (ActionAbstraction)
    '''

    aa_stack = ActionAbstractionStack(list_of_aa=[], prim_actions=mdp_distr.get_actions())

    for level in xrange(1, sa_stack.get_num_levels() + 1):
        
        # Make directed options for the current level.
        sa_stack.set_level(level)
        next_options = aa_helpers.get_directed_options_for_sa(mdp_distr, sa_stack, incl_self_loops=False)

        if not next_options:
            # Too many options, decrease abstracton ratio and continue.
            return False

        next_aa = ActionAbstraction(options=next_options, prim_actions=mdp_distr.get_actions())
        aa_stack.add_aa(next_aa)

    return aa_stack
Пример #5
0
def get_policy_blocks_aa(mdp_distr, num_options=10, task_samples=20, incl_prim_actions=False):
    pb_options = make_policy_blocks_options(mdp_distr, num_options=num_options, task_samples=task_samples)

    if type(mdp_distr) is dict:
        first_mdp = mdp_distr.keys()[0]
    else:
        first_mdp = mdp_distr

    if incl_prim_actions:
        # Include the primitives.
        aa = ActionAbstraction(options=first_mdp.get_actions(), prim_actions=first_mdp.get_actions())
        for o in pb_options:
            aa.add_option(o)
        return aa
    else:
        # Return just the options.
        return ActionAbstraction(options=pb_options, prim_actions=first_mdp.get_actions())
def get_directed_aa(mdp_distr,
                    state_abs,
                    incl_prim_actions=False,
                    max_options=100):
    '''
    Args:
        mdp_distr (dict)
        state_abs (StateAbstraction)
        incl_prim_actions (bool)
        max_options (int)

    Returns:
        (ActionAbstraction)
    '''
    directed_options = action_abs.aa_helpers.get_directed_options_for_sa(
        mdp_distr, state_abs, incl_self_loops=True, max_options=max_options)
    term_prob = 1 - mdp_distr.get_gamma()

    if not directed_options:
        # No good option set found.
        return False

    if incl_prim_actions:
        # Include the primitives.
        aa = ActionAbstraction(options=mdp_distr.get_actions(),
                               prim_actions=mdp_distr.get_actions(),
                               prims_on_failure=False,
                               term_prob=term_prob)
        for o in directed_options:
            aa.add_option(o)
        return aa
    else:
        # Return just the options.
        return ActionAbstraction(options=directed_options,
                                 prim_actions=mdp_distr.get_actions(),
                                 prims_on_failure=True,
                                 term_prob=term_prob)
def get_aa(mdp_distr, default=False):
    '''
    Args:
        mdp (defaultdict)
        default (bool): If true, returns a blank ActionAbstraction

    Returns:
        (ActionAbstraction)
    '''

    if default:
        return ActionAbstraction(options=mdp_distr.get_actions(),
                                 prim_actions=mdp_distr.get_actions())

    return action_abs.aa_helpers.make_greedy_options(mdp_distr)
Пример #8
0
def get_aa_high_prob_opt_single_act(mdp_distr, state_abstr, delta=0.2):
    '''
    Args:
        mdp_distr
        state_abstr (StateAbstraction)

    Summary:
        Computes an action abstraction where there exists an option that repeats a
        single primitive action, for each primitive action that was optimal *with
        high probability* in the ground state in the cluster.
    '''
    # K: state, V: dict (K: act, V: probability)
    action_optimality_dict = state_abstr.get_act_opt_dict()

    # Compute options.
    options = []
    for s_a in state_abstr.get_abs_states():

        ground_states = state_abstr.get_ground_states_in_abs_state(s_a)

        # One option per action.
        for action in mdp_distr.get_actions():
            list_of_state_with_a_optimal_high_pr = []

            # Compute which states have high prob of being optimal.
            for s_g in ground_states:
                print "Pr(a = a^* \mid s_g)", s_g, action, action_optimality_dict[
                    s_g][action]
                if action_optimality_dict[s_g][action] > (1 - delta):
                    list_of_state_with_a_optimal_high_pr.append(s_g)

            if len(list_of_state_with_a_optimal_high_pr) == 0:
                continue

            init_predicate = ContainsPredicate(
                list_of_items=list_of_state_with_a_optimal_high_pr)
            term_predicate = NotPredicate(init_predicate)
            policy_obj = Policy(action)

            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=policy_obj.get_action)

            options.append(o)

    return ActionAbstraction(options=options,
                             prim_actions=mdp_distr.get_actions(),
                             prims_on_failure=True)
Пример #9
0
def get_aa_opt_only_single_act(mdp_distr, state_abstr):
    '''
    Args:
        mdp_distr
        state_abstr (StateAbstraction)

    Summary:
        Computes an action abstraction where there exists an option that repeats a
        single primitive action, for each primitive action that was optimal in
        the ground state in the cluster.
    '''
    action_optimality_dict = state_abstr.get_act_opt_dict()

    # Compute options.
    options = []
    for s_a in state_abstr.get_abs_states():

        ground_states = state_abstr.get_ground_states_in_abs_state(s_a)

        # One option per action.
        for action in mdp_distr.get_actions():
            list_of_state_with_a_optimal = []

            for s_g in ground_states:
                if action in action_optimality_dict[s_g]:
                    list_of_state_with_a_optimal.append(s_g)

            if len(list_of_state_with_a_optimal) == 0:
                continue

            init_predicate = ContainsPredicate(
                list_of_items=list_of_state_with_a_optimal)
            term_predicate = NotPredicate(init_predicate)
            policy_obj = Policy(action)

            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=policy_obj.get_action,
                       term_prob=1 - mdp_distr.get_gamma())

            options.append(o)

    return ActionAbstraction(options=options,
                             prim_actions=mdp_distr.get_actions())
Пример #10
0
class AbstractionWrapper(Agent):
    def __init__(self,
                 SubAgentClass,
                 actions,
                 state_abstr=None,
                 action_abstr=None,
                 name_ext="abstr"):
        '''
        Args:
            SubAgentClass (simple_rl.AgentClass)
            actions (list of str)
            state_abstr (StateAbstraction)
            state_abstr (ActionAbstraction)
            name_ext (str)
        '''

        # Setup the abstracted agent.
        self.agent = SubAgentClass(actions=actions)
        self.action_abstr = ActionAbstraction(
            options=self.agent.actions, prim_actions=self.agent.actions
        ) if action_abstr is None else action_abstr
        self.state_abstr = StateAbstraction(
            {}) if state_abstr is None else state_abstr

        Agent.__init__(self,
                       name=self.agent.name + "-" + name_ext,
                       actions=self.action_abstr.get_actions())

    def act(self, ground_state, reward):
        '''
        Args:
            ground_state (State)
            reward (float)

        Return:
            (str)
        '''
        abstr_state = self.state_abstr.phi(ground_state)
        ground_action = self.action_abstr.act(self.agent, abstr_state,
                                              ground_state, reward)

        return ground_action

    def reset(self):
        # Write data.
        self.agent.reset()
        self.action_abstr.reset()

    def end_of_episode(self):
        self.agent.end_of_episode()
        self.action_abstr.end_of_episode()
Пример #11
0
def get_aa_single_act(mdp_distr, state_abstr):
    '''
    Args:
        mdp_distr
        state_abstr (StateAbstraction)

    Summary:
        Computes an action abstraction where there exists an option that repeats a
        single primitive action, for each primitive action that was optimal in the
        cluster.
    '''

    action_optimality_dict = state_abstr.get_act_opt_dict()

    options = []
    # Compute options.
    for s_a in state_abstr.get_abs_states():
        init_predicate = EqPredicate(y=s_a, func=state_abstr.phi)
        term_predicate = NeqPredicate(y=s_a, func=state_abstr.phi)

        ground_states = state_abstr.get_ground_states_in_abs_state(s_a)

        unique_a_star_in_cluster = set([])
        for s_g in ground_states:
            for a_star in action_optimality_dict[s_g]:
                unique_a_star_in_cluster.add(a_star)

        for action in unique_a_star_in_cluster:
            policy_obj = Policy(action)

            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=policy_obj.get_action)
            options.append(o)

    return ActionAbstraction(options=options,
                             prim_actions=mdp_distr.get_actions())
Пример #12
0
def add_layer_to_aa_stack(mdp_distr, sa_stack, aa_stack):
    '''
    Args:
        mdp_distr (MDPDistribution)
        sa_stack (StateAbstractionStack)
        aa_stack (ActionAbstractionStack)

    Returns:
        (tuple):
            1. (ActionAbstractionStack)
            2. (MDPDistribution)
            3. (bool)
    '''
    if aa_stack.get_num_levels() > 0:
        abstr_mdp_distr = make_abstr_mdp.make_abstr_mdp_distr_multi_level(
            mdp_distr, sa_stack, aa_stack)
    else:
        abstr_mdp_distr = mdp_distr

    # Make options for the level + 1 height.
    sa_stack.set_level_to_max()
    next_options = aa_helpers.get_directed_options_for_sa(
        abstr_mdp_distr,
        sa_stack,
        incl_self_loops=False,
        max_options=1024 / (aa_stack.get_num_levels() + 1))

    if not next_options:
        # Too many options, decrease abstracton ratio and continue.
        return aa_stack, True

    next_aa = ActionAbstraction(options=next_options,
                                prim_actions=mdp_distr.get_actions())

    aa_stack.add_aa(next_aa)
    return aa_stack, False
class AbstractionWrapper(Agent):
    def __init__(self,
                 SubAgentClass,
                 actions,
                 mdp_name,
                 max_option_steps=0,
                 state_abstr=None,
                 action_abstr=None,
                 name_ext="abstr"):
        '''
        Args:
            SubAgentClass (simple_rl.AgentClass)
            actions (list of str)
            mdp_name (str)
            state_abstr (StateAbstraction)
            state_abstr (ActionAbstraction)
            name_ext (str)
        '''

        # Setup the abstracted agent.
        self._create_default_abstractions(actions, state_abstr, action_abstr)
        self.agent = SubAgentClass(actions=self.action_abstr.get_actions())
        self.exp_directory = os.path.join(os.getcwdu(), "results", mdp_name,
                                          "options")
        self.reward_since_tracking = 0
        self.max_option_steps = max_option_steps
        self.num_option_steps = 0
        Agent.__init__(self,
                       name=self.agent.name + "-" + name_ext,
                       actions=self.action_abstr.get_actions())
        self._setup_files()

    def _setup_files(self):
        '''
        Summary:
            Creates and removes relevant directories/files.
        '''
        if not os.path.exists(os.path.join(self.exp_directory)):
            os.makedirs(self.exp_directory)

        if os.path.exists(
                os.path.join(self.exp_directory, str(self.name)) + ".csv"):
            # Remove old
            os.remove(
                os.path.join(self.exp_directory, str(self.name)) + ".csv")

    def write_datum_to_file(self, datum):
        '''
        Summary:
            Writes datum to file.
        '''
        out_file = open(
            os.path.join(self.exp_directory, str(self.name)) + ".csv", "a+")
        out_file.write(str(datum) + ",")
        out_file.close()

    def _record_experience(self, ground_state, reward):
        '''
        Args:
            abstr_state
            abstr_action
            reward
            next_abstr_state

        Summary:
            Tracks experiences to display plots in terms of options.
        '''
        # if not self.action_abstr.is_next_step_continuing_option(ground_state):
        self.write_datum_to_file(self.reward_since_tracking)
        self.reward_since_tracking = 0

    def _create_default_abstractions(self, actions, state_abstr, action_abstr):
        '''
        Summary:
            We here create the default abstractions.
        '''
        if action_abstr is None:
            self.action_abstr = ActionAbstraction(options=actions,
                                                  prim_actions=actions)
        else:
            self.action_abstr = action_abstr

        self.state_abstr = StateAbstraction(
            phi={}) if state_abstr is None else state_abstr

    def act(self, ground_state, reward):
        '''
        Args:
            ground_state (State)
            reward (float)

        Return:
            (str)
        '''
        self.reward_since_tracking += reward

        if self.max_option_steps > 0:
            # We're counting action steps in terms of options.
            if self.num_option_steps == self.max_option_steps:
                # We're at the limit.
                self._record_experience(ground_state, reward)
                self.num_option_steps += 1
                return "terminate"
            elif self.num_option_steps > self.max_option_steps:
                # Skip.
                return "terminate"
            elif not self.action_abstr.is_next_step_continuing_option(
                    ground_state):
                # Taking a new option, count it and continue.
                self.num_option_steps += 1
                self._record_experience(ground_state, reward)
        else:
            self._record_experience(ground_state, reward)

        abstr_state = self.state_abstr.phi(ground_state)

        # print ground_state, abstr_state, hash(ground_state)

        ground_action = self.action_abstr.act(self.agent, abstr_state,
                                              ground_state, reward)

        # print "ground_action", ground_action, type(ground_action), len(ground_action)

        return ground_action

    def reset(self):
        # Write data.
        out_file = open(
            os.path.join(self.exp_directory, str(self.name)) + ".csv", "a+")
        out_file.write("\n")
        out_file.close()
        self.agent.reset()
        self.action_abstr.reset()
        self.reward_since_tracking = 0
        self.num_option_steps = 0

    def new_task(self):
        self._reset_reward()

    def get_num_known_sa(self):
        return self.agent.get_num_known_sa()

    def _reset_reward(self):
        if isinstance(self.agent, RMaxAgent):
            self.agent._reset_reward()

    def end_of_episode(self):
        self.agent.end_of_episode()
        self.action_abstr.end_of_episode()