def _VNode(self, agent=None, root=False, **kwargs): """Returns a VNode with default values; The function naming makes it clear that this function is about creating a VNode object.""" if root: # agent cannot be None. return RootVNodeParticles(self._num_visits_init, self._value_init, agent.history, belief=copy.deepcopy(agent.belief)) else: if agent is None: return VNodeParticles(self._num_visits_init, self._value_init, belief=Particles([])) else: return VNodeParticles(self._num_visits_init, self._value_init, belief=copy.deepcopy(agent.belief))
def __init__(self, num_visits, value, history, belief=Particles([])): # vnodeobj = VNodeParticles(num_visits, value, belief=belief) RootVNode.__init__(self, num_visits, value, history) self.belief = belief
def __init__(self, num_visits, value, belief=Particles([])): self.num_visits = num_visits self.value = value self.belief = belief self.children = {} # a -> QNode
def _simulate(self, state, history, root, parent, observation, depth, k_o=5, alpha_o=1 / 15): # root<-class:VNode, parent<-class:QNode if depth > self._max_depth: return 0 if root is None: if self._agent.tree is None: root = self._VNode(agent=self._agent, root=True) self._agent.tree = root if self._agent.tree.history != self._agent.history: raise ValueError("Unable to plan for the given history.") else: root = self._VNode() if parent is not None: parent[observation] = root action = self._ActionProgWiden(vnode=root, history=history) next_state, observation, reward, nsteps = sample_generative_model( self._agent, state, action) _history_action = root[action] if len(_history_action.children ) <= k_o * _history_action.num_visits**alpha_o: if root[action][observation] is None: history_action_observation_node = self._VNode( agent=self._agent, root=False) root[action][observation] = history_action_observation_node else: observation = random.choice(root[action].children) # append s` to B(hao) root[action][observation].belief.add(next_state) # append Z(o|s,a,s`) to W(hao) prob = self._pomdp.agent._observation_model.probability( observation, next_state, action) Particles.__setitem__(next_state, prob) if observation not in root[action].children: root[action].children += observation total_reward = reward + self._rollout(state, history, root, depth) else: # s` <- select B(hao)[i] w.p W(hao)[i]/sigma(j=1~m) W(hao)[j] next_state = Particles.random() # r <- R(s,a,s`) reward = self._agent.reward_model.sample(state, action, next_state) total_reward = reward + ( self._discount_factor**nsteps) * self._simulate( next_state, history + ((action, observation), ), root[action][observation], root[action], observation, depth + nsteps) root.num_visits += 1 root[action].num_visits += 1 root[action].value = root[action].value + ( total_reward - root[action].value) / (root[action].num_visits) return total_reward