def export_mdp(mdp: MDP, mdp_name: str, strategy: List[int] = []) -> None: states = range(mdp.number_of_states) g = Digraph(mdp_name, filename=mdp_name + '.gv') g.attr('node', shape='circle') for s in states: g.node('s%d' % s, label=mdp.state_name(s)) g.attr('node', shape='point') for s in states: for (alpha, succ_list) in mdp.alpha_successors(s): if strategy and strategy[s] == alpha: color = 'red' else: color = 'black' g.node('s%d->a%d' % (s, alpha), xlabel=' ' + mdp.act_name(alpha) + ' | ' + str(mdp.w(alpha)) + ' ', fontsize='8', fontcolor=color, color=color) g.edge('s%d' % s, 's%d->a%d' % (s, alpha)) for (succ, pr) in succ_list: g.edge('s%d->a%d' % (s, alpha), 's%d' % succ, label=str(round(pr, 4)), fontsize='8') g.view()
def import_from_yaml(stream) -> MDP: """ Import a yaml file (as stream) into a MDP. :param stream: yaml file stream. :return: the MDP imported from the yaml file """ mdp_dict = yaml.load(stream)['mdp'] mdp_states = mdp_dict['states'] mdp_actions = mdp_dict['actions'] states = [state['name'] for state in mdp_states] state_from_name = {} for i in range(len(mdp_states)): state_from_name[states[i]] = i actions = [action['name'] for action in mdp_actions] w = [int(action['weight']) for action in mdp_actions] action_from_name = {} for i in range(len(mdp_actions)): action_from_name[actions[i]] = i mdp = MDP(states, actions, w) for s in range(len(states)): enabled_actions = mdp_states[s]['enabled actions'] for enabled_action in enabled_actions: transitions = [(state_from_name[transition['target']], str_to_float(str(transition['probability']))) for transition in enabled_action['transitions']] alpha = enabled_action['name'] # enable this action in the MDP mdp.enable_action(s, action_from_name[alpha], transitions) return mdp
def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns, following a state s of the MDP, the action that minimize the expected length of paths to a set of target states T. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = min_expected_cost(mdp, T, solver=solver, msg=msg) global v v = x states = range(mdp.number_of_states) act_min = [ mdp.act(s)[argmin([ mdp.w(alpha) + sum(map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], succ_list)) for (alpha, succ_list) in mdp.alpha_successors(s) ])] for s in states ] return lambda s: act_min[s]
def reach(mdp: MDP, T: List[int], msg=0, solver: pulp = pulp.GLPK_CMD()) -> List[float]: """ Compute the maximum reachability probability to T for each state of the MDP in parameter and get a vector x (as list) such that x[s] is the maximum reachability probability to T of the state s. :param mdp: a MDP for which the maximum reachability probability will be computed for each of its states. :param T: a list of target states. :param msg: (optional) set this parameter to 1 to activate the debug mode in the console. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the a list x such that x[s] is the maximum reachability probability to T. """ states = list(range(mdp.number_of_states)) # x[s] is the Pr^max to reach T x = [-1] * mdp.number_of_states connected = connected_to(mdp, T) # find all states s such that s is not connected to T for s in filter(lambda s: not connected[s], states): x[s] = 0 # find all states s such that Pr^max to reach T is 1 for s in pr_max_1(mdp, T, connected=connected): x[s] = 1 # if there exist some other states such that Pr^max to reach T is in ]0, 1[, a LP is generated for these states untreated_states = list(filter(lambda s: x[s] == -1, states)) if untreated_states: # formulate the LP problem linear_program = pulp.LpProblem("reachability", pulp.LpMinimize) # initialize variables for s in untreated_states: x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0, upBound=1) # objective function linear_program += sum(x) # constraints for s in untreated_states: for (alpha, successors_list) in mdp.alpha_successors(s): linear_program += x[s] >= sum( pr * x[succ] for (succ, pr) in successors_list) if msg: print(linear_program) # solve the LP solver.msg = msg linear_program.solve(solver) for s in untreated_states: x[s] = x[s].varValue if msg: print_optimal_solution(x, states, mdp.state_name) global v v = x return x
def minimal_steps_number_to(mdp: MDP, T: List[int]) -> List[float]: """ Compute the shortest path in term of edges in the underlying graph of the MDP to T (i.e., the minimal number of steps to reach T in the underlying graph). The function connected_to (a breadth first serach algorithm) is adapted to number the states instead of mark them. :param mdp: a MDP. :param T: a list of target states of the MDP. :return: a list 'steps' such that, for each state s of the MDP, steps[s] = n where n is the minimal number of steps to reach T in the underlying graph of the MDP. """ steps = [float('inf')] * mdp.number_of_states for t in T: steps[t] = 0 next = deque([]) for t in T: next.extend(mdp.pred(t)) i = 1 while len(next) > 0: predecessors = next next = deque([]) while len(predecessors) > 0: pred = predecessors.pop() if steps[pred] > i: steps[pred] = i next.extend(mdp.pred(pred)) i += 1 return steps
def min_expected_cost(mdp: MDP, T: List[int], msg=0, solver: pulp = pulp.GLPK_CMD()) -> List[float]: """ Compute the minimum expected length of paths to the set of targets T from each state in the MDP. :param mdp: a MDP. :param T: a list of target states of the MDP. :param msg: (optional) set this parameter to 1 to activate the debug mode in the console. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: a list x such that x[s] is the mimum expected length of paths to the set of targets T from the state s of the MDP. """ states = range(mdp.number_of_states) x = [float('inf')] * mdp.number_of_states expect_inf = [True] * mdp.number_of_states # determine states for which x[s] != inf for s in pr_max_1(mdp, T): x[s] = -1 expect_inf[s] = False for t in T: x[t] = 0 # formulate the LP problem linear_program = pulp.LpProblem( "minimum expected length of path to target", pulp.LpMaximize) # initialize variables for s in filter(lambda s: x[s] == -1, states): x[s] = pulp.LpVariable(mdp.state_name(s), lowBound=0) # objective function linear_program += sum( map(lambda s: x[s], filter(lambda s: not expect_inf[s], states))) # constraints for s in filter(lambda s: x[s] == -1, states): for (alpha, successor_list) in mdp.alpha_successors(s): if not list( filter(lambda succ_pr: expect_inf[succ_pr[0]], successor_list)): linear_program += x[s] <= mdp.w(alpha) + sum( map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], successor_list)) if msg: print(linear_program) # solve the LP solver.msg = msg if linear_program.variables(): linear_program.solve(solver) for s in states: if x[s] != 0 and x[s] != float('inf'): x[s] = x[s].varValue if msg: print_optimal_solution(x, states, mdp.state_name) return x
def random_MDP(n: int, a: int, strictly_A: bool = False, complete_graph: bool = False, weights_interval: Tuple[int, int] = (1, 1), force_weakly_connected_to: bool=False) -> MDP: """ Generate a random MDP. :param n: number of states of the generated MDP. :param a: number of actions of the generated MDP. :param strictly_A: (optional) set this parameter to True to force each state of the generated MDP to have exactly a actions, i.e. |A(s)| = a for all state s. :param complete_graph: (optional) set this parameter to True to force the MDP to have a complete underlying graph. :param weights_interval: (optional) set an interval (w1, w2) for weights of each action. Following this parameter, w(α) ∈ [w1, w2] for each action α of the generated MDP. :param force_weakly_connected_to: (optional) set this parameter to True to force some random state to be absorbing states. As consequence, some states should not be connected to a target state T and more states can have a reachability probability to T < 1. :return: a randomly generated MDP. """ states = list(range(n)) actions = list(range(a)) w1, w2 = weights_interval if not (1 <= w1 <= w2): raise ValueError("weights_interval (w1, w2) must be 1 <= w1 <= w2") w = [random.randint(w1, w2) for _ in range(a)] mdp = MDP([], [], w, n) for s in states: if not strictly_A: alpha_list = random.sample(actions, random.randint(1, a)) else: alpha_list = actions if complete_graph: successors_set = set() for alpha in alpha_list: successors = random.sample(states, random.randint(1, n)) if force_weakly_connected_to and random.random() >= 0.7: successors = [s] if complete_graph: successors_set |= set(successors) if alpha == alpha_list[-1]: for succ in filter(lambda succ: succ not in successors_set, states): successors.append(succ) probabilities = random_probability(len(successors)) mdp.enable_action(s, alpha, [(successors[succ], probabilities[succ]) for succ in range(len(probabilities))]) return mdp
def build_strategy(mdp: MDP, T: List[int], solver: pulp = pulp.GLPK_CMD(), msg=0) -> Callable[[int], int]: """ Build a memoryless strategy that returns the action that maximises the reachability probability to T of each state s in parameter of this strategy. :param mdp: a MDP for which the strategy will be built. :param T: a target states list. :param solver: (optional) a LP solver allowed in puLp (e.g., GLPK or CPLEX). :return: the strategy built. """ x = reach(mdp, T, solver=solver, msg=msg) states = range(mdp.number_of_states) act_max = [[] for _ in states] # update act_max for s in states: pr_max = 0 for (alpha, successor_list) in mdp.alpha_successors(s): pr = sum( map(lambda succ_pr: succ_pr[1] * x[succ_pr[0]], successor_list)) if pr == pr_max: act_max[s].append(alpha) elif pr > pr_max: pr_max = pr act_max[s] = [alpha] # compute M^max mdp_max = MDP([], [], mdp._w, mdp.number_of_states, validation=False) for s in states: i = 0 for (alpha, successor_list) in mdp.alpha_successors(s): if alpha == act_max[s][i]: i += 1 mdp_max.enable_action(s, alpha, successor_list) if i == len(act_max[s]): break # compute the final strategy minimal_steps = minimal_steps_number_to(mdp_max, T) strategy: List[int] = [] for s in states: if x[s] == 0 or minimal_steps[s] == 0: strategy.append(act_max[s][0]) else: for (alpha, successor_list) in mdp_max.alpha_successors(s): for (succ, pr) in successor_list: if minimal_steps[succ] == minimal_steps[s] - 1: strategy.append(alpha) break if len(strategy) == s + 1: break return lambda s: strategy[s]
def complete_MDP(n: int, a: int, w: List[int]=[]) -> MDP: """ Worst case of MDP. :param n: number of states :param a: number of actions :param w: weights :return: the MDP generated. """ if not w: w = [1] * a mdp = MDP([], [], w, number_of_states=n) pr = [float(i) / x for i in range(1, n+1) for x in [sum(range(1, n+1))]] for s in range(n): for alpha in range(a): pr = pr[1:] + pr[0:1] to_enable = [None] * n for succ in range(n): to_enable[succ] = (succ, pr[succ]) mdp.enable_action(s, alpha, to_enable) return mdp
def export_to_yaml(mdp: MDP, file_name: str) -> None: """ Serialise a MDP instance into a yaml file. :param mdp: a MDP :param file_name: the name of the yaml file """ mdp_dict = {'mdp': {'states': [], 'actions': []}} for s in range(mdp.number_of_states): mdp_dict['mdp']['states'].append({}) mdp_dict['mdp']['states'][-1]['name'] = mdp.state_name(s) mdp_dict['mdp']['states'][-1]['enabled actions'] = [] for (alpha, succ_list) in mdp.alpha_successors(s): mdp_dict['mdp']['states'][-1]['enabled actions'].append({}) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'name'] = mdp.act_name(alpha) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'] = [] for (succ, pr) in succ_list: mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'].append({}) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'][-1]['target'] = mdp.state_name(succ) mdp_dict['mdp']['states'][-1]['enabled actions'][-1][ 'transitions'][-1]['probability'] = pr for alpha in range(mdp.number_of_actions): mdp_dict['mdp']['actions'].append({}) mdp_dict['mdp']['actions'][-1]['name'] = mdp.act_name(alpha) mdp_dict['mdp']['actions'][-1]['weight'] = mdp.w(alpha) if file_name: with open(file_name + '.yaml', 'w') as yaml_file: yaml.dump(mdp_dict, yaml_file, default_flow_style=False) else: print(yaml.dump(mdp_dict, default_flow_style=False))
def connected_to(mdp: MDP, T: List[int]) -> List[bool]: """ Compute the states connected to T. For this purpose, a backward breadth-first search algorithm on the underlying graph of the MDP is used. :param mdp: a MDP. :param T: a list of target states of the MDP. :return: a list 'marked' such that, for each state s of the MDP, marked[s] = True if s is connected to T in the underlying graph of the MDP. """ marked = [False] * mdp.number_of_states for t in T: marked[t] = True next = deque([]) for t in T: next.extend(mdp.pred(t)) while len(next) > 0: pred = next.pop() if not marked[pred]: marked[pred] = True for predecessor in mdp.pred(pred): next.appendleft(predecessor) return marked
def pr_max_1(mdp: MDP, T: List[int], connected: List[bool] = []) -> List[int]: """ Compute the states s of the MDP such that the maximum probability to reach T from s is 1. :param mdp: a MDP. :param T: a target states list of the MDP. :param connected: (optional) list of the states of the MDP connected to T. If this parameter is not provided, it is computed in the function. :return: the list of states s of the MDP such that the maximum probability to reach T from s is 1. """ if not connected: connected = connected_to(mdp, T) removed_state = [False] * mdp.number_of_states T_set = set(T) disabled_action = [[False] * len(mdp.act(s)) for s in range(mdp.number_of_states)] no_disabled_actions = [0] * mdp.number_of_states U = [s for s in range(mdp.number_of_states) if not connected[s]] while len(U) > 0: R = deque(U) while len(R) > 0: u = R.pop() for (t, alpha_i) in mdp._alpha_pred[u]: if connected[t] and not disabled_action[t][ alpha_i] and t not in T_set: disabled_action[t][alpha_i] = True no_disabled_actions[t] += 1 if no_disabled_actions[t] == len(mdp.act(t)): R.appendleft(t) connected[t] = False removed_state[u] = True sub_mdp = MDP([], [], [], number_of_states=mdp.number_of_states, validation=False) for s in range(mdp.number_of_states): if not removed_state[s]: for alpha_i in range(len(mdp.act(s))): if not disabled_action[s][alpha_i]: sub_mdp.enable_action( s, mdp._enabled_actions[s][0][alpha_i], filter( lambda succ_pr: not removed_state[succ_pr[0]], mdp._enabled_actions[s][1][alpha_i])) mdp = sub_mdp connected = connected_to(mdp, T) U = [ s for s in range(mdp.number_of_states) if not connected[s] and not removed_state[s] ] pr_1 = [s for s in range(mdp.number_of_states) if not removed_state[s]] return pr_1