Exemplo n.º 1
0
 def set_new_node(
     self, node, depth, er_bound, prob, parent_likelihood
 ):  #creating new nodes (precisely, it's not actually instantiating, but fill values of newly instantiated node)
     # sets the fields of a new node
     b = node.state.belief
     node.risk = bound_prob(avg_func(b, self.r))
     # Depth of a node is its dist to the root
     node.depth = depth
     node.set_prob(prob)
     node.set_likelihood(
         prob * parent_likelihood)  #likelehood=parent likelihood * prob
     # Probability of violating constraints in a belief state. (never
     # change)
     if is_terminal_belief(b, self.term,
                           self.terminal_prob):  #terminal belif:
         self.set_terminal_node(node)
     elif node.depth == self.fixed_horizon:
         # self.set_terminal_node(node)
         node.value = avg_func(b, self.h)
         node.terminal = True  # new node is non terminal
         node.best_action = None  # no action associated yet
         node.exec_risk_bound = bound_prob(er_bound)  # execution risk bound
         # avg heuristic estimate of execution risk at node
         node.set_exec_risk(node.risk)
         node.risk_upper = node.exec_risk
     else:
         # the value of a node is the average of the heuristic only when it's
         # first created. After that, the value is given as a function of
         # its children
         node.value = avg_func(b, self.h)
         node.terminal = False  # new node is non terminal
         node.best_action = None  # no action associated yet
         node.exec_risk_bound = bound_prob(er_bound)  # execution risk bound
         # avg heuristic estimate of execution risk at node
         node.set_exec_risk(node.risk)
Exemplo n.º 2
0
    def set_new_node(self, node, depth, er_bound, prob, parent_likelihood):
        # sets the fields of a new node
        if self.continuous_belief:
            b = node.state
            node.risk = bound_prob(self.r(b))
            node.depth = depth
            node.set_prob(prob)
            node.set_likelihood(prob * parent_likelihood)
            if self.term(node.state):
                self.set_terminal_node(node)
            else:
                # the value of a node is the average of the heuristic only when it's
                # first created. After that, the value is given as a function of
                # its children
                node.value = self.h(node)
                node.terminal = False  # new node is non terminal
                node.best_action = None  # no action associated yet
                node.exec_risk_bound = bound_prob(
                    er_bound)  # execution risk bound
                # avg heuristic estimate of execution risk at node
                node.set_exec_risk(node.risk)
        else:
            b = node.state.belief
            node.risk = bound_prob(avg_func(b, self.r))
            # Depth of a node is its dist to the root
            node.depth = depth
            node.set_prob(prob)
            node.set_likelihood(prob * parent_likelihood)

            # Probability of violating constraints in a belief state. (never
            # change)
            if is_terminal_belief(b, self.term, self.terminal_prob):
                self.set_terminal_node(node)
            else:
                # the value of a node is the average of the heuristic only when it's
                # first created. After that, the value is given as a function of
                # its children
                node.value = avg_func(b, self.h)
                node.terminal = False  # new node is non terminal
                node.best_action = None  # no action associated yet
                node.exec_risk_bound = bound_prob(
                    er_bound)  # execution risk bound
                # avg heuristic estimate of execution risk at node
                node.set_exec_risk(node.risk)
Exemplo n.º 3
0
    def update_values_and_best_actions(self, expanded_nodes):
        # updates the Q values on nodes on the graph and the current best policy
        # for each expanded node at a time
        self.debug('\n ****************************')
        self.debug('Update values and best actions  ')
        self.debug('****************************')

        for exp_idx, exp_node in enumerate(expanded_nodes):
            Z = self.build_ancestor_list(exp_node)
            # updates the best action at the node
            for node in Z:
                self.debug('\nupdate values and best action: ' +
                           str(node.state.state_print()))
                self.debug('current Q: ', node.value, "\n")

                # all actions available at that node
                all_action_operators = [] if node.terminal else self.graph.all_node_operators(
                    node)
                # get all actions (operators) of node from graph
                # risk at the node's belief state (does no depend on the action
                # taken)
                risk = node.risk
                # current *admissible* (optimistic) estimate of the node's Q
                # value
                current_Q = node.value
                # execution risk bound. the execution risk cap depends on type of chance
                # constraint being imposed
                er_bound = min([node.exec_risk_bound, self.er_cap])
                if self.cc_type == 'everywhere':
                    er_bound = self.er_cap

                best_action_idx = -1
                best_Q = self.initial_Q  # -inf or inf based on optimization
                best_D = -1  # depth
                exec_risk_for_best = -1.0

                # Estimates value and risk of the current node for each
                # possible action
                for act_idx, act in enumerate(all_action_operators):
                    probs = act.properties['prob']
                    prob_safe = act.properties['prob_safe']
                    children = self.graph.hyperedge_successors(node, act)
                    # estimate Q of taking this action from current node. Composed of
                    # current reward and the average reward of its children
                    Q = act.op_value + \
                        np.sum([p * child.value for (p, child)
                                in zip(probs, children)])
                    # Average child depth
                    D = 1 + np.sum([
                        p * child.depth for (p, child) in zip(probs, children)
                    ])

                    # compute an estimate of the er of taking this action from current node.
                    # composed of the current risk and the avg execution risk
                    # of its children
                    if self.cc_type == 'overall':
                        exec_risk = risk + (1.0 - risk) * np.sum([
                            p * child.exec_risk
                            for (p, child) in zip(prob_safe, children)
                        ])
                    # enforcing same risk bound at all steps in the policy
                    elif self.cc_type == 'everywhere':
                        exec_risk = risk

                    self.debug('action: ' + act.name + ' children: ' +
                               str(children[0].state.state_print()) +
                               ' risk ' + str(exec_risk))
                    self.debug('  act_op_value: ', act.op_value)

                    for child in children:
                        self.debug(' child_value: ', child.value)

                    self.debug('  children Q: ' + str(Q))

                    # if execution risk bound has been violated or if Q value for this action is worse
                    # than current best, we should definitely not select it.
                    if (exec_risk > er_bound) or self.is_worse(Q, best_Q):
                        select_action = False
                        if (exec_risk > er_bound):
                            self.debug(' Action pruned by risk bound')
                    # if risk bound respected and Q value is equal or better
                    else:
                        select_action = True
                    # Test if the risk bound for the current node has been
                    # violated
                    if select_action:
                        # Updates the execution risk bounds for the children
                        child_er_bounds, cc_infeasible = self.compute_exec_risk_bounds(
                            er_bound, risk, children, prob_safe)
                        for child in children:
                            self.debug('  select_action: child ' +
                                       child.state.state_print() + " depth: " +
                                       str(child.depth) + " risk bound: " +
                                       str(child.exec_risk_bound) +
                                       ' infeasible: ' + str(cc_infeasible))
                        if not cc_infeasible:  # if chance constraint has not been violated
                            for idx, child in enumerate(children):
                                child.exec_risk_bound = child_er_bounds[idx]

                            # Updates the best action at node
                            best_Q = Q
                            best_action_idx = act_idx
                            best_D = D
                            exec_risk_for_best = exec_risk
                # Test if some action has been selected
                if best_action_idx >= 0:
                    # if (not np.isclose(best_Q, current_Q)) and self.is_better(best_Q, current_Q):
                    #     print('current_Q', current_Q, 'best_Q', best_Q)

                    #     print(
                    #         'WARNING: node Q value improved, which might indicate inadmissibility.')

                    # propagate execution risk bound down to all descendants (added by Sungkwoen)
                    er_bound_updating_nodes = deque([node])

                    while len(er_bound_updating_nodes) > 0:
                        updating_node = er_bound_updating_nodes.popleft()

                        if updating_node.best_action:
                            best_action_updating = updating_node.best_action
                            er_bound_updating = updating_node.exec_risk_bound
                            risk_updating = updating_node.risk
                            probs_updating = best_action_updating.properties[
                                'prob']
                            prob_safe_updating = best_action_updating.properties[
                                'prob_safe']
                            children_updating = self.graph.hyperedge_successors(
                                updating_node, best_action_updating)

                            child_er_bounds, er_bound_infeasible = self.compute_exec_risk_bounds(
                                er_bound_updating, risk_updating,
                                children_updating, prob_safe_updating)

                            for idx, child in enumerate(children_updating):
                                child.exec_risk_bound = child_er_bounds[idx]

                            er_bound_updating_nodes.extend(children_updating)

                    # updates optimal value est, execution tisk est, and mark
                    # best action
                    node.set_value(best_Q)
                    node.set_exec_risk(exec_risk_for_best)
                    node.set_best_action(all_action_operators[best_action_idx])
                    self.debug('best action for ' +
                               str(node.state.state_print()) + ' set as ' +
                               str(all_action_operators[best_action_idx].name))
                else:  # no action was selected, so this node is terminal
                    self.debug('*\n*\n*\n*\n no best action for ' +
                               str(node.state.state_print()) + '\n*\n*\n*\n')

                    # mdeyo: Finally got plans with deadends to work!
                    # Deadends = state with no actions available, either
                    # because it's an actual deadend or because all actons were
                    # too risky.
                    # If the deadend was on the optimal path, the planner would
                    # just mark it terminal and planning would end before
                    # the goal was achieved

                    # mdeyo: Current fix is just to mark the deadend state as
                    # having execution risk = 1.0 so that the planner will
                    # remove the previous action from policy and properly pick
                    # the next best action at the parent state
                    # node.risk = 1.0
                    # node.set_exec_risk(node.risk)

                    # mdeyo: alternative, possibly better fix is to update the
                    # value instead of the risk, setting the value to +inf when
                    # minimizing

                    # only mark inf value deadend if not actually the goal
                    if not is_terminal_belief(node.state.belief, self.term,
                                              self.terminal_prob):
                        self.mark_deadend(node)

                    if not node.terminal and not node.deadend:
                        self.set_terminal_node(node)
Exemplo n.º 4
0
    def expand_best_partial_solution(self):
        # expands a node in the graph currently contained in the best
        # partial solution. Add new nodes and edges on the graph

        # nodes_to_expand = self.opennodes
        # self.opennodes = None

        nodes_to_expand = [self.choose_node()]  # choose best node

        for node in nodes_to_expand:
            self.debug('\n ******* expanding node *******')
            self.debug(node.state.state_print())
            # print(node.state.state_print())
            self.debug('******************************\n')
            belief = node.state.belief  # belief state associated to the node
            parent_risk = node.risk  # execution risk for current node
            parent_bound = node.exec_risk_bound  # er bound for current node
            parent_depth = node.depth  # dist of parent to root
            parent_likelihood = node.likelihood  # likelihood that node is reached in policy

            if self.cc_type == 'everywhere':
                parent_bound = self.cc

            self.debug('compute_exec_risk_bounds: parent_bound ', parent_bound,
                       ' parent_risk ', parent_risk)

            # if the current node is guaranteed to violate constraints and a violation
            # is set to halt process: make node terminal
            if self.halt_on_violation and np.isclose(parent_risk, 1.0):
                all_node_actions = []
            else:
                # else get the available actions from model
                all_node_actions = self.get_all_actions(belief, self.A)

            action_added = False  # flag if a new action has been added

            if len(all_node_actions) > 0:
                added_count = 0

                for act in all_node_actions:
                    self.debug("\n", act)
                    child_obj_list, prob_list, prob_safe_list, new_child_idxs = self.obtain_child_objs_and_probs(
                        belief, self.T, self.O, self.r, act)

                    # initializes the new child nodes
                    for c_idx in new_child_idxs:
                        self.set_new_node(child_obj_list[c_idx],
                                          parent_depth + 1, 0.0,
                                          prob_list[c_idx], parent_likelihood)

                    # if parent bound Delta is ~ 1.0, the child nodes are guaranteed to have
                    # their risk bound equal to 1
                    if (not np.isclose(parent_bound, 1.0)):
                        # computes execution risk bounds for the child nodes
                        er_bounds, er_bound_infeasible = self.compute_exec_risk_bounds(
                            parent_bound, parent_risk, child_obj_list,
                            prob_safe_list)
                    else:
                        er_bounds = [1.0] * len(child_obj_list)
                        er_bound_infeasible = False

                    # Only creates new operator if all er bounds a non-negative
                    if not er_bound_infeasible:
                        # updates the values of the execution risk for all children
                        # that will be added to the graph
                        for idx, child in enumerate(child_obj_list):
                            child.exec_risk_bound = er_bounds[idx]

                        # average instantaneous value (cost or reward)
                        avg_op_value = avg_func(belief, self.V, act)

                        act_obj = RAOStarGraphOperator(name=str(act),
                                                       op_value=avg_op_value,
                                                       properties={
                                                           'prob':
                                                           prob_list,
                                                           'prob_safe':
                                                           prob_safe_list
                                                       })
                        # an "Action" object crerated
                        # add edge (Action) to graph
                        self.graph.add_hyperedge(parent_obj=node,
                                                 child_obj_list=child_obj_list,
                                                 prob_list=prob_list,
                                                 op_obj=act_obj)

                        action_added = True
                        added_count += 1
                    else:
                        self.debug(
                            '  action not added - error bound infeasible')

            if not action_added:
                # self.debug('action not added')
                # self.set_terminal_node(node)

                if not is_terminal_belief(node.state.belief, self.term,
                                          self.terminal_prob):
                    self.mark_deadend(node)

                if not node.terminal and not node.deadend:
                    self.set_terminal_node(node)

        # returns the list of node either added actions to or marked terminal
        return nodes_to_expand