Пример #1
0
    def _convert_to_options(self, action_list):
        '''
        Args:
            action_list (list)

        Returns:
            (list of Option)
        '''
        options = []
        for ground_action in action_list:
            o = ground_action
            if type(ground_action) is str:
                o = Option(init_predicate=Predicate(make_lambda(True)),
                           term_predicate=Predicate(make_lambda(True)),
                           policy=make_lambda(ground_action),
                           name="prim." + ground_action)
            else:
                # TODO: Why does it not converted into an option?
                # YJ: edit
                o = Option(init_predicate=Predicate(make_lambda(True)),
                           term_predicate=Predicate(make_lambda(True)),
                           policy=make_lambda(ground_action),
                           name="prim." + str(ground_action))

                # print(type(ground_action))
            options.append(o)
        return options
Пример #2
0
def make_near_optimal_phi_relative_options(mdp,
                                           state_abstr,
                                           method='optimal',
                                           num_rand_opts=0,
                                           **kwargs):
    """
    Args:
        mdp
        state_abstr
        method
        num_rand_opts

    Returns:
        (list)
    """
    # Get the optimal Q function
    from planning.OptionsMDPValueIterationClass import OptionsMDPValueIteration
    from data_structs.OptionsMDPClass import OptionsMDP

    if isinstance(mdp, OptionsMDP):
        value_iter = OptionsMDPValueIteration(mdp, sample_rate=20)
    else:
        value_iter = ValueIteration(mdp, sample_rate=10)

    value_iter.run_vi()

    options = []
    optimal_options = []
    for s_phi in state_abstr.get_abs_states():
        init_predicate = EqPredicate(y=s_phi, func=state_abstr.phi)
        term_predicate = NeqPredicate(y=s_phi, func=state_abstr.phi)
        o_star = Option(init_predicate=init_predicate,
                        term_predicate=term_predicate,
                        policy=lambda s: value_iter.policy(s))

        if method == 'optimal':
            options.append(o_star)
        if method == 'eps-greedy':
            eps = kwargs['eps']

            eps_greedy_policy = get_eps_greedy_policy(eps, value_iter.policy,
                                                      mdp.get_actions())

            o_eps = Option(init_predicate=init_predicate,
                           term_predicate=term_predicate,
                           policy=eps_greedy_policy)

            for _ in range(num_rand_opts):
                o_rand = Option(
                    init_predicate=init_predicate,
                    term_predicate=term_predicate,
                    policy=lambda x: random.choice(mdp.get_actions()))
                options.append(o_rand)

            options.append(o_eps)
        else:
            options.append(o_star)

    return options, optimal_options
Пример #3
0
def make_fixed_rand_options(mdp, state_abstr):
    '''
    Args:
        mdp (simple_rl.MDP)
        state_abstr (simple_rl.StateAbstraction)

    Returns:
        (list)
    '''
    # Grab relevant states.
    abs_states = state_abstr.get_abs_states()
    g_start_state = mdp.get_init_state()

    # Compute all directed options that transition between abstract states.
    options = []
    state_pairs = {}
    placeholder_policy = lambda s : random.choice(mdp.get_actions(s))

    # For each s_{phi,1} s_{phi,2} pair.
    for s_a in abs_states:
        for s_a_prime in abs_states:
            if not(s_a == s_a_prime) and (s_a,s_a_prime) not in state_pairs.keys() and (s_a_prime, s_a) not in state_pairs.keys():
                # Make an option to transition between the two states.
                init_predicate = InListPredicate(ls=state_abstr.get_ground_states_in_abs_state(s_a))
                term_predicate = InListPredicate(ls=state_abstr.get_ground_states_in_abs_state(s_a_prime))
                
                o = Option(init_predicate=init_predicate,
                           term_predicate=term_predicate,
                           policy=placeholder_policy)

                options.append(o)
                state_pairs[(s_a, s_a_prime)] = 1    # Grab relevant states.
    abs_states = state_abstr.get_abs_states()
    g_start_state = mdp.get_init_state()

    # Compute all directed options that transition between abstract states.
    options = []
    state_pairs = {}
    placeholder_policy = lambda s : random.choice(mdp.get_actions(s))

    # For each s_{phi,1} s_{phi,2} pair.
    for s_a in abs_states:
        for s_a_prime in abs_states:
            if not(s_a == s_a_prime) and (s_a,s_a_prime) not in state_pairs.keys() and (s_a_prime, s_a) not in state_pairs.keys():
                # Make an option to transition between the two states.
                init_predicate = InListPredicate(ls=state_abstr.get_ground_states_in_abs_state(s_a))
                term_predicate = InListPredicate(ls=state_abstr.get_ground_states_in_abs_state(s_a_prime))
                
                o = Option(init_predicate=init_predicate,
                           term_predicate=term_predicate,
                           policy=placeholder_policy)

                options.append(o)
                state_pairs[(s_a, s_a_prime)] = 1
def make_goal_based_options(mdp_distr):
    '''
    Args:
        mdp_distr (MDPDistribution)

    Returns:
        (list): Contains Option instances.
    '''

    goal_list = set([])
    for mdp in mdp_distr.get_all_mdps():
        vi = ValueIteration(mdp)
        state_space = vi.get_states()
        for s in state_space:
            if s.is_terminal():
                goal_list.add(s)

    options = set([])
    for mdp in mdp_distr.get_all_mdps():

        init_predicate = Predicate(func=lambda x: True)
        term_predicate = InListPredicate(ls=goal_list)
        o = Option(init_predicate=init_predicate,
                   term_predicate=term_predicate,
                   policy=_make_mini_mdp_option_policy(mdp),
                   term_prob=0.0)
        options.add(o)

    return options
Пример #5
0
def make_phi_relative_options(mdp, state_abstr, options_per_s_phi=5):
    '''
    Args:
        mdp (simple_rl.MDP)
        state_abstr (simple_rl.StateAbstraction)
        option_epsilon (float)
        options_per_s_phi (int)

    Returns:
        (list)
    '''

    options = []

    # For each abstract state.
    for s_phi in state_abstr.get_abs_states():

        for option in range(options_per_s_phi):
            # Make an option to transition between the two states.
            init_predicate = EqPredicate(y=s_phi, func=state_abstr.phi)
            term_predicate = NeqPredicate(y=s_phi, func=state_abstr.phi)
            next_option = Option(init_predicate=init_predicate,
                                 term_predicate=term_predicate,
                                 policy=get_fixed_random_policy(mdp))
            options.append(next_option)

    return options
Пример #6
0
def primitive_action_to_option(action):
    true_predicate = Predicate(lambda s: True)
    policy = lambda s: action

    return Option(init_predicate=true_predicate,
                  term_predicate=true_predicate,
                  policy=policy,
                  name='o_' + str(action))
def make_point_options(mdp, pairs, policy='vi'):
    '''
    Args:
        mdp
        pairs: a list of pairs. Each pair is a list containing init set and term set.

    Returns:
        (list): Contains Option instances.
    '''

    options = set([])
    for pair in pairs:
        init = pair[0]
        term = pair[1]
        if type(init) is not list:
            init = [init]
        if type(term) is not list:
            term = [term]
        # init_predicate = Predicate(func=lambda x: True)
        init_predicate = InListPredicate(ls=init)
        term_predicate = InListPredicate(ls=term)

        if policy == 'vi':
            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=_make_mini_mdp_option_policy(mdp, n_iters=100),
                       term_prob=0.0)
        elif policy == 'dqn':
            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=_make_dqn_option_policy(mdp, term[0]),
                       term_prob=0.0)
        else:
            assert (False)
        options.add(o)

    return options
Пример #8
0
def find_eigenoptions(mdp, num_options=4, init_everywhere=False):
    delta = 0.001 # threshold for float point error
    
    # TODO: assume that the state-space is strongly connected.

    # Compute laplacian.
    A, state_to_id, id_to_state = get_transition_matrix(mdp)
    for n in range(A.shape[0]):
        if A[n][n] == 1:
            A[n][n] = 0 # Prune self-loops for the analysis            
    degrees = np.sum(A, axis=0)
    T = np.diag(degrees)
    Tngsqrt = np.diag(1.0 / np.sqrt(degrees))
    L = T - A
    normL = np.matmul(np.matmul(Tngsqrt, L), Tngsqrt)
    eigenvals, eigenvecs = np.linalg.eigh(normL)
    eigenoptions = []

    for i in range(0, num_options):
        # 1st eigenval is not useful
        maxnode = np.argwhere(eigenvecs[:,i] >= np.amax(eigenvecs[:, i]) - delta) + 1
        minnode = np.argwhere(eigenvecs[:,1] <= np.amin(eigenvecs[:, 1]) + delta) + 1

        # Make init/goal sets.
        init_set_nums = list(maxnode.flatten())
        init_set = [id_to_state[s - 1] for s in init_set_nums]
        goal_set_nums = list(minnode.flatten())
        goal_set = [id_to_state[s - 1] for s in goal_set_nums]

        # Define predicates.
        if init_everywhere:
            # Initiate everywhere.
            init_predicate = Predicate(lambda x:True)
        else:
            # Terminate everywhere
            init_predicate = InListPredicate(ls=init_set)
        term_predicate = InListPredicate(ls=goal_set)

        eigen_o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=make_option_policy(mdp, id_to_state.values(), goal_set))

        eigenoptions.append(eigen_o)


        # TODO: translate to an Option object.

    return eigenoptions[0:num_options]
Пример #9
0
def make_single_action_phi_relative_options(mdp, state_abstr):
    """
    For every s_phi, constructs a phi-relative option corresponding to
    each action that takes that action everywhere within s_phi.
    """
    options = []
    for s_phi in state_abstr.get_abs_states():
        actions = mdp.get_actions()
        for action in actions:
            init_predicate = EqPredicate(y=s_phi, func=state_abstr.phi)
            term_predicate = NeqPredicate(y=s_phi, func=state_abstr.phi)
            # See https://stackoverflow.com/questions/19837486/python-lambda-in-a-loop
            # for why the lambda is constructed like this.
            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=lambda s, bound_action=action: bound_action)
            options.append(o)
    return options
def compute_omega_given_m_phi(mdp, state_abstr):
    '''
    Args:
        mdp (simple_rl.MDP)
        phi (simple_rl.abstraction.StateAbstraction)

    Returns:
        omega (simple_rl.abstraction.ActionAbstraction)
    '''
    # Grab relevant states.
    abs_states = state_abstr.get_abs_states()
    g_start_state = mdp.get_init_state()

    # Compute all directed options that transition between abstract states.
    options = []
    state_pairs = {}
    placeholder_policy = lambda s: random.choice(mdp.get_actions(s))

    # For each s_{phi,1} s_{phi,2} pair.
    for s_a in abs_states:
        for s_a_prime in abs_states:
            if not (s_a == s_a_prime) and (
                    s_a, s_a_prime) not in state_pairs.keys() and (
                        s_a_prime, s_a) not in state_pairs.keys():
                # Make an option to transition between the two states.
                init_predicate = InListPredicate(
                    ls=state_abstr.get_ground_states_in_abs_state(s_a))
                term_predicate = InListPredicate(
                    ls=state_abstr.get_ground_states_in_abs_state(s_a_prime))

                o = Option(init_predicate=init_predicate,
                           term_predicate=term_predicate,
                           policy=placeholder_policy)

                options.append(o)
                state_pairs[(s_a, s_a_prime)] = 1

    # Prune.
    pruned_option_set = ah._prune_redundant_options(options,
                                                    state_pairs.keys(),
                                                    state_abstr, mdp)

    return ActionAbstraction(options=pruned_option_set,
                             on_failure="primitives")
Пример #11
0
def make_fixed_random_options(mdp, state_abstr, num_options_per_s_a=2):
    """
    Args:
        mdp
        state_abstr

    Returns:
        (list)
    """

    options = []
    for s_phi in state_abstr.get_abs_states():
        init_predicate = EqPredicate(y=s_phi, func=state_abstr.phi)
        term_predicate = NeqPredicate(y=s_phi, func=state_abstr.phi)

        for _ in range(num_options_per_s_a):
            o_rand = Option(init_predicate=init_predicate,
                            term_predicate=term_predicate,
                            policy=get_fixed_random_policy(mdp))
            options.append(o_rand)

    return options
def make_subgoal_options(mdp,
                         goal_list,
                         init_space=None,
                         vectors=None,
                         n_trajs=100,
                         n_steps=100,
                         classifier='list',
                         policy='vi'):
    '''
    Args:
        mdp
        goal_list: set of lists.
        init_space: list of states.

    Returns:
        (list): Contains Option instances.
    '''

    if classifier == 'list':
        init_predicate = InListPredicate(ls=init_space)
    elif classifier == 'svc':
        init_predicate = ClassifierPredicate(init_space)
    else:
        print('Error: unknown predicate for init condition:', classifier)
        assert (False)

    options = set([])
    # print('init_space=', init_space)
    for i, gs in enumerate(goal_list):

        # print('goals=', g)
        # print('type(g)=', g)
        # init_predicate = Predicate(func=lambda x: True)
        # init_predicate = InListPredicate(ls=init_space)

        ############################
        # Termination set is set to (the subgoal state) + (unknown region).
        ############################
        term = copy(init_space)

        # print('term=', term, type(term))
        # print('type(term)=', type(term))
        # print('gs=', gs)
        for g in gs:
            # print('g=', g, type(g))
            if g in term:
                term.remove(g)

        if classifier == 'list':
            term_predicate = InListPredicate(ls=term, true_if_in=False)
        elif classifier == 'svc':
            term_predicate = ClassifierPredicate(term, true_if_in=False)
        else:
            print('Error: unknown predicate for init condition:', classifier)
            assert (False)

        if policy == 'vi':
            vector = dict()
            for g in gs:
                vector[hash(g)] = 1
            mdp_ = IntrinsicMDP(intrinsic_reward=vector, mdp=mdp)
            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=_make_mini_mdp_option_policy(mdp_, n_iters=100),
                       term_prob=0.0)
        elif policy == 'dqn':
            o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=_make_dqn_option_policy(mdp,
                                                      vectors[i],
                                                      n_trajs=n_trajs,
                                                      n_steps=n_steps),
                       term_prob=0.0)
        else:
            print('Error: unknown policy for options:', policy)
            assert (False)

            # policy=_make_mini_mdp_option_policy(mdp),
        options.add(o)

    return options
Пример #13
0
    return Option(init_predicate=true_predicate,
                  term_predicate=true_predicate,
                  policy=policy,
                  name='o_' + str(action))


if __name__ == '__main__':

    s0 = State(data=0)
    s1 = State(data=1)

    s0_predicate = Predicate(lambda s: s == s0)
    s1_predicate = Predicate(lambda s: s == s1)
    policy = lambda s: 'a0' if s == s0 else 'a1'  #shouldn't actually matter

    o0 = Option(s0_predicate, s1_predicate, policy, name='0 --> 1')
    o1 = Option(s1_predicate, s0_predicate, policy, name='1 --> 0')

    def reward_func(state, action):
        if state == s0 and action == o0:
            return 1
        if state == s1 and action == o1:
            return 1
        return 0

    def transition_func(state, action):
        if state == s0 and action == o0:
            return s1
        if state == s1 and action == o1:
            return s0
        return state
Пример #14
0
def find_betweenness_options(mdp, t=0.1, init_everywhere=False):
    T, state_to_id, id_to_state = get_transition_matrix(mdp)

    # print("T=", T)
    G = nx.from_numpy_matrix(T)
    N = G.number_of_nodes()
    M = G.number_of_edges()
    # print("nodes=", N)
    # print("edges=", M)

    #########################
    ## 1. Enumerate all candidate subgoals
    #########################
    subgoal_set = []
    for s in G.nodes():
        # print("s=", s)
        csv = nx.betweenness_centrality_subset(G, sources=[s], targets=G.nodes())
        # csv = nx.betweenness_centrality(G)
        # print("csv=", csv)
        for v in csv:
            if (s is not v) and (csv[v] / (N-2) > t) and (v not in subgoal_set):
                subgoal_set.append(v)

    # for s in subgoal_set:
    #     print(s, " is subgoal")
    # n_subgoals = sum(subgoal_set)
    # print(n_subgoals, "goals in total")
    # centralities = nx.betweenness_centrality(G)
    # for n in centralities:
    #     print("centrality=", centralities[n])

    #########################
    ## 2. Generate an initiation set for each subgoal
    #########################
    initiation_sets = defaultdict(list)
    support_scores = defaultdict(float)
    
    for g in subgoal_set:
        csg = nx.betweenness_centrality_subset(G, sources=G.nodes(), targets=[g])
        score = 0
        for s in G.nodes():
            if csg[s] / (N-2) > t:
                initiation_sets[g].append(s)
                score += csg[s]
        support_scores[g] = score
                
    # for g in subgoal_set:
    #     print("init set for ", g, " = ", initiation_sets[g])

    #########################
    ## 3. Filter subgoals according to their supports
    #########################
    filtered_subgoals = []

    subgoal_graph = G.subgraph(subgoal_set)
    
    sccs = nx.connected_components(subgoal_graph) # TODO: connected components are used instead of SCCs
    # sccs = nx.strongly_connected_components(G)
    for scc in sccs:
        scores = []
        goals = []
        for n in scc:
            scores.append(support_scores[n])
            goals.append(n)
            # print("score of ", n, " = ", support_scores[n])
        # scores = [support_scores[x] for x in scc]
        best_score = max(scores)
        best_goal = goals[scores.index(best_score)]
        filtered_subgoals.append(best_goal)

    options = []
    for g in filtered_subgoals:
        init_set_nums = initiation_sets[g]
        goal_set_nums = [g]
        init_set = [id_to_state[s] for s in init_set_nums]
        goal_set = [id_to_state[s] for s in goal_set_nums]


        # Define predicates.
        if init_everywhere:
            # Initiate everywhere.
            init_predicate = Predicate(lambda x:True)
        else:
            # Terminate everywhere
            init_predicate = InListPredicate(ls=init_set)
        term_predicate = InListPredicate(ls=goal_set)

        between_o = Option(init_predicate=init_predicate,
                       term_predicate=term_predicate,
                       policy=make_option_policy(mdp, id_to_state.values(), goal_set))

        options.append(between_o)

    return options