Python MDP示例，structures.mdp.MDP Python示例

示例#1

0

显示文件

文件： graphviz.py 项目： florentdelgrange/Stochastic-Shortest-Path

def export_mdp(mdp: MDP, mdp_name: str, strategy: List[int] = []) -> None:
    states = range(mdp.number_of_states)

    g = Digraph(mdp_name, filename=mdp_name + '.gv')

    g.attr('node', shape='circle')
    for s in states:
        g.node('s%d' % s, label=mdp.state_name(s))

    g.attr('node', shape='point')
    for s in states:
        for (alpha, succ_list) in mdp.alpha_successors(s):
            if strategy and strategy[s] == alpha:
                color = 'red'
            else:
                color = 'black'
            g.node('s%d->a%d' % (s, alpha),
                   xlabel=' ' + mdp.act_name(alpha) + ' | ' +
                   str(mdp.w(alpha)) + ' ',
                   fontsize='8',
                   fontcolor=color,
                   color=color)
            g.edge('s%d' % s, 's%d->a%d' % (s, alpha))
            for (succ, pr) in succ_list:
                g.edge('s%d->a%d' % (s, alpha),
                       's%d' % succ,
                       label=str(round(pr, 4)),
                       fontsize='8')

    g.view()

示例#2

0

显示文件

文件： yaml_parser.py 项目： florentdelgrange/Stochastic-Shortest-Path

def import_from_yaml(stream) -> MDP:
    """
    Import a yaml file (as stream) into a MDP.

    :param stream: yaml file stream.
    :return: the MDP imported from the yaml file
    """
    mdp_dict = yaml.load(stream)['mdp']
    mdp_states = mdp_dict['states']
    mdp_actions = mdp_dict['actions']
    states = [state['name'] for state in mdp_states]
    state_from_name = {}
    for i in range(len(mdp_states)):
        state_from_name[states[i]] = i
    actions = [action['name'] for action in mdp_actions]
    w = [int(action['weight']) for action in mdp_actions]
    action_from_name = {}
    for i in range(len(mdp_actions)):
        action_from_name[actions[i]] = i

    mdp = MDP(states, actions, w)
    for s in range(len(states)):
        enabled_actions = mdp_states[s]['enabled actions']
        for enabled_action in enabled_actions:
            transitions = [(state_from_name[transition['target']],
                            str_to_float(str(transition['probability'])))
                           for transition in enabled_action['transitions']]

            alpha = enabled_action['name']

            # enable this action in the MDP
            mdp.enable_action(s, action_from_name[alpha], transitions)

    return mdp

示例#3

0

显示文件

def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize
    the expected length of paths to a set of target states T.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = min_expected_cost(mdp, T, solver=solver, msg=msg)
    global v
    v = x

    states = range(mdp.number_of_states)
    act_min = [
        mdp.act(s)[argmin([
            mdp.w(alpha) +
            sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list))
            for (alpha, succ_list) in mdp.alpha_successors(s)
        ])] for s in states
    ]

    return lambda s: act_min[s]

示例#4

0

显示文件

def reach(mdp: MDP,
          T: List[int],
          msg=0,
          solver: pulp = pulp.GLPK_CMD()) -> List[float]:
    """
    Compute the maximum reachability probability to T for each state of the MDP in parameter and get a vector x (as list)
    such that x[s] is the maximum reachability probability to T of the state s.

    :param mdp: a MDP for which the maximum reachability probability will be computed for each of its states.
    :param T: a list of target states.
    :param msg: (optional) set this parameter to 1 to activate the debug mode in the console.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the a list x such that x[s] is the maximum reachability probability to T.
    """
    states = list(range(mdp.number_of_states))
    # x[s] is the Pr^max to reach T
    x = [-1] * mdp.number_of_states
    connected = connected_to(mdp, T)

    # find all states s such that s is not connected to T
    for s in filter(lambda s: not connected[s], states):
        x[s] = 0
    # find all states s such that Pr^max to reach T is 1
    for s in pr_max_1(mdp, T, connected=connected):
        x[s] = 1

    # if there exist some other states such that Pr^max to reach T is in ]0, 1[, a LP is generated for these states
    untreated_states = list(filter(lambda s: x[s] == -1, states))
    if untreated_states:

        # formulate the LP problem
        linear_program = pulp.LpProblem("reachability", pulp.LpMinimize)
        # initialize variables
        for s in untreated_states:
            x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0, upBound=1)
        # objective function
        linear_program += sum(x)
        # constraints
        for s in untreated_states:
            for (alpha, successors_list) in mdp.alpha_successors(s):
                linear_program += x[s] >= sum(
                    pr * x[succ] for (succ, pr) in successors_list)

        if msg:
            print(linear_program)

        # solve the LP
        solver.msg = msg
        linear_program.solve(solver)

        for s in untreated_states:
            x[s] = x[s].varValue

    if msg:
        print_optimal_solution(x, states, mdp.state_name)

    global v
    v = x

    return x

示例#5

0

显示文件

def minimal_steps_number_to(mdp: MDP, T: List[int]) -> List[float]:
    """
    Compute the shortest path in term of edges in the underlying graph of the MDP to T (i.e., the minimal number of steps
    to reach T in the underlying graph). The function connected_to (a breadth first serach algorithm) is adapted to
    number the states instead of mark them.

    :param mdp: a MDP.
    :param T: a list of target states of the MDP.
    :return: a list 'steps' such that, for each state s of the MDP, steps[s] = n where n is the minimal number of steps
             to reach T in the underlying graph of the MDP.
    """
    steps = [float('inf')] * mdp.number_of_states
    for t in T:
        steps[t] = 0
    next = deque([])
    for t in T:
        next.extend(mdp.pred(t))
    i = 1
    while len(next) > 0:
        predecessors = next
        next = deque([])
        while len(predecessors) > 0:
            pred = predecessors.pop()
            if steps[pred] > i:
                steps[pred] = i
                next.extend(mdp.pred(pred))
        i += 1
    return steps

示例#6

0

显示文件

def min_expected_cost(mdp: MDP,
                      T: List[int],
                      msg=0,
                      solver: pulp = pulp.GLPK_CMD()) -> List[float]:
    """
    Compute the minimum expected length of paths to the set of targets T from each state in the MDP.

    :param mdp: a MDP.
    :param T: a list of target states of the MDP.
    :param msg: (optional) set this parameter to 1 to activate the debug mode in the console.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: a list x such that x[s] is the mimum expected length of paths to the set of targets T from the state s of
             the MDP.
    """
    states = range(mdp.number_of_states)
    x = [float('inf')] * mdp.number_of_states
    expect_inf = [True] * mdp.number_of_states

    # determine states for which x[s] != inf
    for s in pr_max_1(mdp, T):
        x[s] = -1
        expect_inf[s] = False
    for t in T:
        x[t] = 0

    # formulate the LP problem
    linear_program = pulp.LpProblem(
        "minimum expected length of path to target", pulp.LpMaximize)
    # initialize variables
    for s in filter(lambda s: x[s] == -1, states):
        x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0)
    # objective function
    linear_program += sum(
        map(lambda s: x[s], filter(lambda s: not expect_inf[s], states)))
    # constraints
    for s in filter(lambda s: x[s] == -1, states):
        for (alpha, successor_list) in mdp.alpha_successors(s):
            if not list(
                    filter(lambda succ_pr: expect_inf[succ_pr[0]],
                           successor_list)):
                linear_program += x[s] <= mdp.w(alpha) + sum(
                    map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]],
                        successor_list))
    if msg:
        print(linear_program)

    # solve the LP
    solver.msg = msg
    if linear_program.variables():
        linear_program.solve(solver)

    for s in states:
        if x[s] != 0 and x[s] != float('inf'):
            x[s] = x[s].varValue

    if msg:
        print_optimal_solution(x, states, mdp.state_name)

    return x

示例#7

0

显示文件

def random_MDP(n: int, a: int,
               strictly_A: bool = False,
               complete_graph: bool = False,
               weights_interval: Tuple[int, int] = (1, 1),
               force_weakly_connected_to: bool=False) -> MDP:
    """
    Generate a random MDP.

    :param n: number of states of the generated MDP.
    :param a: number of actions of the generated MDP.
    :param strictly_A: (optional) set this parameter to True to force each state of the generated MDP to have exactly
                       a actions, i.e. |A(s)| = a for all state s.
    :param complete_graph: (optional) set this parameter to True to force the MDP to have a complete underlying graph.
    :param weights_interval: (optional) set an interval (w1, w2) for weights of each action. Following this parameter,
                             w(α) ∈ [w1, w2] for each action α of the generated MDP.
    :param force_weakly_connected_to: (optional) set this parameter to True to force some random state to be absorbing
                                      states. As consequence, some states should not be connected to a target state T
                                      and more states can have a reachability probability to T < 1.
    :return: a randomly generated MDP.
    """
    states = list(range(n))
    actions = list(range(a))
    w1, w2 = weights_interval
    if not (1 <= w1 <= w2):
        raise ValueError("weights_interval (w1, w2) must be 1 <= w1 <= w2")
    w = [random.randint(w1, w2) for _ in range(a)]
    mdp = MDP([], [], w, n)

    for s in states:
        if not strictly_A:
            alpha_list = random.sample(actions, random.randint(1, a))
        else:
            alpha_list = actions
        if complete_graph:
            successors_set = set()
        for alpha in alpha_list:
            successors = random.sample(states, random.randint(1, n))
            if force_weakly_connected_to and random.random() >= 0.7:
                    successors = [s]
            if complete_graph:
                successors_set |= set(successors)
                if alpha == alpha_list[-1]:
                    for succ in filter(lambda succ: succ not in successors_set, states):
                        successors.append(succ)
            probabilities = random_probability(len(successors))
            mdp.enable_action(s, alpha,
                              [(successors[succ], probabilities[succ]) for succ in range(len(probabilities))])

    return mdp

示例#8

0

显示文件

def build_strategy(mdp: MDP,
                   T: List[int],
                   solver: pulp = pulp.GLPK_CMD(),
                   msg=0) -> Callable[[int], int]:
    """
    Build a memoryless strategy that returns the action that maximises the reachability probability to T
    of each state s in parameter of this strategy.

    :param mdp: a MDP for which the strategy will be built.
    :param T: a target states list.
    :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX).
    :return: the strategy built.
    """
    x = reach(mdp, T, solver=solver, msg=msg)

    states = range(mdp.number_of_states)
    act_max = [[] for _ in states]

    # update act_max
    for s in states:
        pr_max = 0
        for (alpha, successor_list) in mdp.alpha_successors(s):
            pr = sum(
                map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]],
                    successor_list))
            if pr == pr_max:
                act_max[s].append(alpha)
            elif pr > pr_max:
                pr_max = pr
                act_max[s] = [alpha]

    # compute M^max
    mdp_max = MDP([], [], mdp._w, mdp.number_of_states, validation=False)
    for s in states:
        i = 0
        for (alpha, successor_list) in mdp.alpha_successors(s):
            if alpha == act_max[s][i]:
                i += 1
                mdp_max.enable_action(s, alpha, successor_list)
            if i == len(act_max[s]):
                break

    # compute the final strategy
    minimal_steps = minimal_steps_number_to(mdp_max, T)
    strategy: List[int] = []
    for s in states:
        if x[s] == 0 or minimal_steps[s] == 0:
            strategy.append(act_max[s][0])
        else:
            for (alpha, successor_list) in mdp_max.alpha_successors(s):
                for (succ, pr) in successor_list:
                    if minimal_steps[succ] == minimal_steps[s] - 1:
                        strategy.append(alpha)
                        break
                if len(strategy) == s + 1:
                    break
    return lambda s: strategy[s]

示例#9

0

显示文件

def complete_MDP(n: int, a: int, w: List[int]=[]) -> MDP:
    """
    Worst case of MDP.

    :param n: number of states
    :param a: number of actions
    :param w: weights
    :return: the MDP generated.
    """
    if not w:
        w = [1] * a
    mdp = MDP([], [], w, number_of_states=n)
    pr = [float(i) / x for i in range(1, n+1) for x in [sum(range(1, n+1))]]
    for s in range(n):
        for alpha in range(a):
            pr = pr[1:] + pr[0:1]
            to_enable = [None] * n
            for succ in range(n):
                to_enable[succ] = (succ, pr[succ])
            mdp.enable_action(s, alpha, to_enable)
    return mdp

示例#10

0

显示文件

文件： yaml_parser.py 项目： florentdelgrange/Stochastic-Shortest-Path

def export_to_yaml(mdp: MDP, file_name: str) -> None:
    """
    Serialise a MDP instance into a yaml file.

    :param mdp: a MDP
    :param file_name: the name of the yaml file
    """
    mdp_dict = {'mdp': {'states': [], 'actions': []}}
    for s in range(mdp.number_of_states):
        mdp_dict['mdp']['states'].append({})
        mdp_dict['mdp']['states'][-1]['name'] = mdp.state_name(s)
        mdp_dict['mdp']['states'][-1]['enabled actions'] = []
        for (alpha, succ_list) in mdp.alpha_successors(s):
            mdp_dict['mdp']['states'][-1]['enabled actions'].append({})
            mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                'name'] = mdp.act_name(alpha)
            mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                'transitions'] = []
            for (succ, pr) in succ_list:
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'].append({})
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'][-1]['target'] = mdp.state_name(succ)
                mdp_dict['mdp']['states'][-1]['enabled actions'][-1][
                    'transitions'][-1]['probability'] = pr
    for alpha in range(mdp.number_of_actions):
        mdp_dict['mdp']['actions'].append({})
        mdp_dict['mdp']['actions'][-1]['name'] = mdp.act_name(alpha)
        mdp_dict['mdp']['actions'][-1]['weight'] = mdp.w(alpha)
    if file_name:
        with open(file_name + '.yaml', 'w') as yaml_file:
            yaml.dump(mdp_dict, yaml_file, default_flow_style=False)
    else:
        print(yaml.dump(mdp_dict, default_flow_style=False))

示例#11

0

显示文件

def connected_to(mdp: MDP, T: List[int]) -> List[bool]:
    """
    Compute the states connected to T.
    For this purpose, a backward breadth-first search algorithm on the underlying graph of the MDP is used.

    :param mdp: a MDP.
    :param T: a list of target states of the MDP.
    :return: a list 'marked' such that, for each state s of the MDP, marked[s] = True if s is connected to T in
             the underlying graph of the MDP.
    """
    marked = [False] * mdp.number_of_states
    for t in T:
        marked[t] = True
    next = deque([])
    for t in T:
        next.extend(mdp.pred(t))
    while len(next) > 0:
        pred = next.pop()
        if not marked[pred]:
            marked[pred] = True
            for predecessor in mdp.pred(pred):
                next.appendleft(predecessor)
    return marked

示例#12

0

显示文件

def pr_max_1(mdp: MDP, T: List[int], connected: List[bool] = []) -> List[int]:
    """
    Compute the states s of the MDP such that the maximum probability to reach T from s is 1.

    :param mdp: a MDP.
    :param T: a target states list of the MDP.
    :param connected: (optional) list of the states of the MDP connected to T. If this parameter is not provided, it is
                      computed in the function.
    :return: the list of states s of the MDP such that the maximum probability to reach T from s is 1.
    """
    if not connected:
        connected = connected_to(mdp, T)
    removed_state = [False] * mdp.number_of_states
    T_set = set(T)
    disabled_action = [[False] * len(mdp.act(s))
                       for s in range(mdp.number_of_states)]
    no_disabled_actions = [0] * mdp.number_of_states

    U = [s for s in range(mdp.number_of_states) if not connected[s]]
    while len(U) > 0:
        R = deque(U)
        while len(R) > 0:
            u = R.pop()
            for (t, alpha_i) in mdp._alpha_pred[u]:
                if connected[t] and not disabled_action[t][
                        alpha_i] and t not in T_set:
                    disabled_action[t][alpha_i] = True
                    no_disabled_actions[t] += 1
                    if no_disabled_actions[t] == len(mdp.act(t)):
                        R.appendleft(t)
                        connected[t] = False
            removed_state[u] = True
        sub_mdp = MDP([], [], [],
                      number_of_states=mdp.number_of_states,
                      validation=False)
        for s in range(mdp.number_of_states):
            if not removed_state[s]:
                for alpha_i in range(len(mdp.act(s))):
                    if not disabled_action[s][alpha_i]:
                        sub_mdp.enable_action(
                            s, mdp._enabled_actions[s][0][alpha_i],
                            filter(
                                lambda succ_pr: not removed_state[succ_pr[0]],
                                mdp._enabled_actions[s][1][alpha_i]))
        mdp = sub_mdp
        connected = connected_to(mdp, T)
        U = [
            s for s in range(mdp.number_of_states)
            if not connected[s] and not removed_state[s]
        ]
    pr_1 = [s for s in range(mdp.number_of_states) if not removed_state[s]]
    return pr_1