Exemplo n.º 1
0
    def vpi(self, state) -> 'float, >= -0.001':
        """
        Calculates vpi. All nodes of branch are important. Basically calculating vpi_action with goal node selected
        """
        option_dist = []
        for option in range(1, self.no_options+1):
            action = self.goals[option - 1][0]
            obs = (*self.subtree[action][0:], *self.path_to(action)[1:])
            obs = list(set(obs))
            op_dist = self.node_value_after_observe_option(option, state, obs)
            node_idx = self.goals[option-1][0]
            if not hasattr(state[node_idx], 'sample'):
                goal_dist = Categorical(vals=[state[node_idx]], probs= [1])
            else:
                goal_dist = state[node_idx]
            dists = [op_dist, goal_dist]

            option_dist.append(cross_1(dists, sum))

        net_dist = self.shrink(option_dist)
        nvao = float(cmax(net_dist, default=ZERO).expectation())
        # print("VPI Node observe value = {}".format(nvao))
        result = nvao - self.expected_term_reward_disc(state)
        if abs(result) < 0.001:
            result = 0.0
        return result
Exemplo n.º 2
0
    def vpi_action(self, action, state) -> 'float, >= -0.001':
        """
        Calculates vpi action. Nodes of importance are those who are either parents or children of the node selected
        """

        # print("Ground Truth = {}".format(self.ground_truth))
        # print("State = {}".format(state))
        # print("Action = {}".format(action))
        option_dist = []
        obs = (*self.subtree[action][0:], *self.path_to(action)[1:])
        obs = list(set(obs))
        for option in range(1, self.no_options + 1):
            op_dist = self.node_value_after_observe_option(option, state, obs)
            node_idx = self.goals[option - 1][0]
            if not hasattr(state[node_idx], 'sample'):
                goal_dist = Categorical(vals=[state[node_idx]], probs=[1])
            else:
                goal_dist = state[node_idx]
            dists = [op_dist, goal_dist]
            option_dist.append(cross_1(dists, sum))

        net_dist = self.shrink(option_dist)
        nvao = float(cmax(net_dist, default=ZERO).expectation())

        # print(obs)
        # print("Env.state = {}".format(state))
        # for _,i in enumerate(state):
        #     print(i)
        # print("Expected Term Reward = {}".format(self.expected_term_reward(state)))
        # print("Observe Node Expected = {}".format(self.node_value_after_observe(obs, 0, state,verbose).expectation()))
        result = nvao - self.expected_term_reward_disc(state)
        if abs(result) < 0.001:
            result = 0.0

        return result
    def high_vpi(self, state, bins=4):
        """Returns the high level VPI

        Arguments:
            state: high state for computation
            bins: number of bins to discretize continuous distribution
        """
        dists = []
        for option in range(1, self.no_options +
                            1):  # To get the node distributions
            goal_clicked = self.goals[option - 1][0]
            node = self.low_state[goal_clicked]
            if hasattr(node, 'sample'):
                if hasattr(node, 'mu'):
                    dist = node.to_discrete(n=bins, max_sigma=4)
                    dist.vals = tuple([(round(val, 3)) for val in dist.vals])
                    dist.probs = tuple([(round(p, 3)) for p in dist.probs])
                else:
                    dist = node
            else:
                dist = Categorical(vals=[node], probs=[1])
            dists.append(dist)
        net_dist = self.shrink(dists)
        expected_return = cmax(net_dist).expectation()
        return expected_return - self.expected_high_term_reward(state)
Exemplo n.º 4
0
def exact_node_value_after_observe(obs_tree):
    """A distribution over the expected value of node, after making an observation.

    `obs` can be a single node, a list of nodes, or 'all'
    """
    children = tuple(exact_node_value_after_observe(c) + c[0]
                     for c in obs_tree[1])
    return cmax(children, default=ZERO)
Exemplo n.º 5
0
 def shrink(self, option_dist):
     if len(option_dist) == 2:
         return option_dist
     else:
         #         print("Continuing")
         two_dist = [option_dist[0], option_dist[1]]
         #         print(two_dist)
         new_dist = [cmax(two_dist, default=ZERO)] + option_dist[2:]
         return self.shrink(new_dist)
def exact_node_value_after_observe(obs_tree):
    """A distribution over the expected value of node, after making an observation.

    Arguments:
            obs_tree: the tree indicating the nodes used for observation
    """
    children = tuple(
        exact_node_value_after_observe(c) + c[0] for c in obs_tree[1])
    return cmax(children, default=ZERO)