def add(self,data): self.no_of_nodes+=1 if self.head == None: node = Node(data) node.node_next = None self.head = node else: node = Node(data) node.node_next = self.head self.head = node
def start(self, observation): self.episode_counter += 1 if self.keep_tree and self.root is None: self.root = Node(None, observation) self.expansion(self.root) if self.keep_tree: self.subtree_node = self.root else: self.subtree_node = Node(None, observation) self.expansion(self.subtree_node) action = BaseDynaAgent.start(self, observation) return action
def expansion(self, node): for a in self.action_list: next_state, is_terminal, reward = self.true_model( node.get_state(), a) # with the assumption of deterministic model # if np.array_equal(next_state, node.get_state()): # continue value = self.get_initial_value(next_state) child = Node(node, next_state, is_terminal=is_terminal, action_from_par=a, reward_from_par=reward, value=value) node.add_child(child) buffer_prev_state = self.getStateRepresentation(node.get_state()) act_ind = self.getActionIndex(a) buffer_prev_action = torch.tensor([act_ind], device=self.device).view(1, 1) buffer_reward = torch.tensor([reward], device=self.device) buffer_state = None buffer_action = None if not is_terminal: buffer_state = self.getStateRepresentation(next_state) buffer_action = self.policy(buffer_state) self.updateTransitionBuffer( utils.transition(buffer_prev_state, buffer_prev_action, buffer_reward, buffer_state, buffer_action, is_terminal, self.time_step, 0))
def expansion(self, node): children_list = [] sort_list = [] for a in self.action_list: next_state, is_terminal, reward = self.true_model( node.get_state(), a) # with the assumption of deterministic model # if np.array_equal(next_state, node.get_state()): # continue value = self.get_initial_value(next_state) child = Node(node, next_state, is_terminal=is_terminal, action_from_par=a, reward_from_par=reward, value=value) children_list.append(child) sort_value = self.get_state_value(next_state) sort_list.append(sort_value) children_list = [ x for _, x in sorted(zip(sort_list, children_list), key=lambda pair: pair[0], reverse=True) ] for i in range(self.branch_factor): node.add_child(children_list[i])
def enqueue(self, data): n = Node(data) if self.count() == 0: self.first = self.last = n return self.last.next = n self.last = n
def step(self, reward, observation): if not self.keep_subtree: self.subtree_node = Node(None, observation) self.expansion(self.subtree_node) for i in range(self.num_iterations): self.MCTS_iteration() action, sub_tree = self.choose_action() self.subtree_node = sub_tree return action
def start(self, observation): if self.keep_tree and self.root is None: self.root = Node(None, observation) self.expansion(self.root) if self.keep_tree: self.subtree_node = self.root print(self.subtree_node.get_avg_value()) else: self.subtree_node = Node(None, observation) self.expansion(self.subtree_node) # self.render_tree() for i in range(self.num_iterations): self.MCTS_iteration() action, sub_tree = self.choose_action() self.subtree_node = sub_tree return action
def append_to_tail(self, data): end = Node(data) n = self.head if n is None: self.head = end return while n.next is not None: n = n.next n.next = end
def expansion(self, node): for a in self.action_list: next_state, is_terminal, reward = self.true_model( node.get_state(), a) # with the assumption of deterministic model # if np.array_equal(next_state, node.get_state()): # continue value = self.get_initial_value(next_state) child = Node(node, next_state, is_terminal=is_terminal, action_from_par=a, reward_from_par=reward, value=value) node.add_child(child)
def expansion(self, node): for a in self.action_list: next_state, is_terminal, reward = self.true_model( node.get_state(), a) # with the assumption of deterministic model # if np.array_equal(next_state, node.get_state()): # continue value = self.get_initial_value(next_state) child = Node(node, next_state, is_terminal=is_terminal, action_from_par=a, reward_from_par=reward, value=value) node.add_child(child) buffer_prev_state = self.getStateRepresentation(node.get_state()) act_ind = self.getActionIndex(a) buffer_prev_action = torch.tensor([act_ind], device=self.device).view(1, 1) buffer_reward = torch.tensor([reward], device=self.device) buffer_state = None buffer_action = None if not is_terminal: buffer_state = self.getStateRepresentation(next_state) buffer_action = self.policy(buffer_state) with torch.no_grad(): real_prev_action = self.action_list[buffer_prev_action.item()] prev_state_value = self.getStateActionValue( buffer_prev_state, real_prev_action).item() state_value = 0 if not is_terminal: state_value = self._vf['q']['network'](buffer_state).max( 1)[1].view(1, 1).item() buffer_state = buffer_state.float() td_error = buffer_reward.item( ) + self.gamma * state_value - prev_state_value if (td_error >= self.td_average): self.updateTransitionBuffer( utils.transition(buffer_prev_state.float(), buffer_prev_action, buffer_reward.float(), buffer_state, None, is_terminal, self.time_step, 0)) self.mcts_count += 1 self.update_average_td_error(td_error)
def step(self, reward, observation): if not self.keep_subtree: self.subtree_node = Node(None, observation) self.expansion(self.subtree_node) self.time_step += 1 self.state = self.getStateRepresentation(observation) reward = torch.tensor([reward], device=self.device) self.action = self.policy(self.state) # store the new transition in buffer if self.episode_counter % 2 == 1: self.updateTransitionBuffer( utils.transition(self.prev_state, self.prev_action, reward, self.state, self.action, False, self.time_step, 0)) # update target if self._target_vf['counter'] >= self._target_vf['update_rate']: self.setTargetValueFunction(self._vf['q'], 'q') # self.setTargetValueFunction(self._vf['s'], 's') # update value function with the buffer if self._vf['q']['training']: if len(self.transition_buffer) >= self._vf['q']['batch_size']: transition_batch = self.getTransitionFromBuffer( n=self._vf['q']['batch_size']) self.updateValueFunction(transition_batch, 'q') if self._vf['s']['training']: if len(self.transition_buffer) >= self._vf['s']['batch_size']: transition_batch = self.getTransitionFromBuffer( n=self._vf['q']['batch_size']) self.updateValueFunction(transition_batch, 's') # train/plan with model self.trainModel() self.plan() self.updateStateRepresentation() self.prev_state = self.getStateRepresentation(observation) self.prev_action = self.action # another option:** we can again call self.policy function ** return self.action_list[self.prev_action.item()]
def add(self, element): new_node = Node(element) # new_node.get_children() new_node.set_children(self.head_node) self.head_node = new_node self.size += 1
def push(self, data): n = Node(data) n.next = self.top self.top = n