示例#1
0
    def rollout(self, lookahead, env, n: OR_Node):
        while not n.SOLVED and lookahead.sim_calls - lookahead.init_sim_calls < lookahead.sim_budget:
            lookahead.rollout_depth += 1
            t0 = time.perf_counter()
            lookahead.expand(env, n)

            # Pick random unsolved child of n
            t0 = time.perf_counter()
            n = lookahead.pick_random_unsolved_child(env, n)
            tf = time.perf_counter()
            lookahead.rollout_runtime_pick_random_unsolved += tf - t0
            if n.terminal:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.solve_and_propagate_labels(n)
                if lookahead.worst_terminal_accumulated_reward is None or lookahead.worst_terminal_accumulated_reward > n.accumulated_reward:
                    lookahead.worst_terminal_accumulated_reward = n.accumulated_reward
                break
            t0 = time.perf_counter()
            is_novel = lookahead.root.feature_table.is_novel((n.state[0]))
            tf = time.perf_counter()
            lookahead.rollout_runtime_is_novel += tf - t0
            if is_novel:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.root.feature_table.update_feature_table((n.state[0]))
            elif not n.visited:
                #pruned as is not novel
                n.randomV = lookahead.cost_to_go_est(env, n)
                lookahead.solve_and_propagate_labels(n)
                break
        if not n.SOLVED and lookahead._pruned_state_strategy == "heuristic":
            # If didn't finish rollout due to computational budget apply heuistic value
            n.randomV = lookahead.cost_to_go_est(env, n)
示例#2
0
    def make_root_node(self, s, forget=True):
        n = OR_Node(s, 0)
        n.accumulated_reward = 0
        n.num_visits = 0
        if forget:
            self._exp_graph = AND_OR_Graph()
            #gc.collect()

        if self.root is not None:
            del self.root.feature_table

        try:
            n = self._exp_graph.locate(n)
            n.SOLVED = False
            self.free_mem(self.root, n)
            #gc.collect()
            self.root = n
            self.strategy.initialize_feature_table(self, n)
            wizluk.logger.debug("Root node already considered")
        except KeyError:
            n.SOLVED = False
            n.visited = False
            if self.root is not None:
                self.free_mem(self.root, n)
                #gc.collect()
            self._exp_graph.register(n)
            self._exp_graph.add_root(n)
            self.root = n
            self.strategy.initialize_feature_table(self, n)
            wizluk.logger.debug("New root node ")
        self.current = self.root
示例#3
0
    def rollout(self, lookahead, env, n: OR_Node):
        while not n.SOLVED and lookahead.sim_calls - lookahead.init_sim_calls < lookahead.sim_budget:
            lookahead.rollout_depth += 1
            lookahead.expand(env, n)
            # Pick random unsolved child of n
            n = lookahead.pick_random_unsolved_child(env, n)
            if n.terminal:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.solve_and_propagate_labels(n)
                if lookahead.worst_terminal_accumulated_reward is None or lookahead.worst_terminal_accumulated_reward > n.accumulated_reward:
                    lookahead.worst_terminal_accumulated_reward = n.accumulated_reward
                break
            f, v, rank, old_depth = lookahead.root.feature_table.get_novel_feature(
                (n.state[0], n.d))
            if n.d < old_depth:
                n.visited = True
                lookahead.num_visited += 1
                lookahead.root.feature_table.update_feature_table(
                    (n.state[0], n.d))
            elif not n.visited and n.d >= old_depth:
                n.visited = True
                lookahead.num_visited += 1
                #pruned as is not novel
                n.randomV = lookahead.cost_to_go_est(env, n)

                lookahead.solve_and_propagate_labels(n)
                break
            elif n.visited and old_depth < n.d:
                #pruned as is not novel
                n.randomV = lookahead.cost_to_go_est(env, n)
                n._children = {}
                lookahead.solve_and_propagate_labels(n)
                break

        if not n.SOLVED and lookahead._pruned_state_strategy == "heuristic":  #If didn't finish rollout due to computational budget apply heuistic value
            n.randomV = lookahead.cost_to_go_est(env, n)
示例#4
0
    def make_root_node(self, s, forget=True):
        n = OR_Node(s, 0)
        if forget:
            self._exp_graph = AND_OR_Graph()

        try:
            n = self._exp_graph.locate(n)
            self.root = n
            #wizluk.logger.debug("Root node already considered")
        except KeyError:
            n.visited = False
            self._exp_graph.register(n)
            self._exp_graph.add_root(n)
            self.root = n
            #wizluk.logger.debug("New root node ")
        self.current = self.root
    def make_root_node(self, s, forget=True):
        n = OR_Node(s, 1)
        if forget:
            self._exp_graph = AND_OR_Graph()

        try:
            n = self._exp_graph.locate(n)
            self.free_mem(self.root, n)
            self.root = n
        except KeyError:
            n.visited = False
            if self.root is not None:
                self.free_mem(self.root, n)
            self._exp_graph.register(n)
            self._exp_graph.add_root(n)
            self.root = n
        self.root._d = 0
        self.current = self.root
示例#6
0
    def pick_random_unsolved_child(self, env, n: OR_Node):
        selected = self.sample_child(n)
        assert (not n.children[selected].SOLVED)
        if self._atari == "True" and len(n.children[selected].children
                                         ) != 0 and self._caching != "None":
            elapsed_steps = env._elapsed_steps
            envuw = env.unwrapped
            for node, reward in n.children[selected].children:
                break
            if node.restoreState is not None:
                env.unwrapped.restore_full_state(node.restoreState)
                env._elapsed_steps = elapsed_steps + 1
            else:
                t0 = time.perf_counter()
                next_state, sreward, terminal, _ = env.step(selected)
                tf = time.perf_counter()
                self.rollout_runtime_sim += tf - t0
                reward = sreward
                self.sim_calls += 1
                node.restoreState = env.unwrapped.clone_full_state()
                node.terminal = terminal
        else:
            t0 = time.perf_counter()
            next_state, reward, terminal, _ = env.step(selected)
            tf = time.perf_counter()
            self.rollout_runtime_sim += tf - t0
            self.sim_calls += 1

            if self._representation is not None:
                t0 = time.perf_counter()

                parentLength = int(n.state.size)

                screen = env.unwrapped.ale.getScreen()
                next_state_flat = self._representation.getActiveFeatures(
                    screen, n.state[0], parentLength)

                tf = time.perf_counter()

                next_state_flat = np.reshape(next_state_flat,
                                             [1, len(next_state_flat)])

                self.rollout_runtime_sim += tf - t0
            elif self._grayScale == "True":
                next_state_flat = np.reshape(
                    self.convertScreenToGrayCompressed(next_state),
                    [1, self._grayScaleSizeX * self._grayScaleSizeY])
            else:
                next_state_flat = np.reshape(
                    next_state, [1, np.prod(env.observation_space.shape)])

            succ = OR_Node(next_state_flat, n.d + 1, terminal)
            succ.add_parent(n.children[selected])
            if self._atari == "True" and self._caching != "None":
                succ.restoreState = env.unwrapped.clone_full_state()
            succ.SOLVED = False
            succ.visited = False

            if succ.d == self._horizon:
                succ.terminal = True

            if n.children[selected].update(reward, succ):
                n.children[
                    selected].SOLVED = False  # if we get a new successor, we unsolve the node
                node = succ
            else:
                for child in n.children[selected].children:
                    succ1, reward1 = child
                    if reward1 == reward and succ1 == succ:
                        node = succ1

        try:
            node.accumulated_reward = max(node.accumulated_reward,
                                          n.accumulated_reward + reward)
        except AttributeError:
            node.accumulated_reward = n.accumulated_reward + reward
            node.num_visits = 0

        self.max_depth = max(self.max_depth, node.d)
        n.children[selected].num_visits += 1
        node.num_visits += 1
        return node