def get_successors(self, node, mprim): obs = node.obs if self.discrepancy_fn is not None and self.successors_fn is not None: if self.discrepancy_fn(obs, mprim, 0) != 0: # Query the successors fn to get the cost and next_obs next_observation, cost = self.successors_fn(obs, mprim) next_obs = {'observation': next_observation} next_node = Node(next_obs) return next_node, cost cost = 0 # Step through all discrete states that the mprim goes through # and sum up the cost for discrete_state in mprim.discrete_states: xd, yd, thetad = discrete_state current_observation = np.array( [max(min(obs['observation'][0] + xd, X_DISCRETIZATION-1), 0), max(min(obs['observation'][1] + yd, Y_DISCRETIZATION-1), 0), thetad], dtype=int) cost_step = self.cost_map[current_observation[0], current_observation[1]] cost += cost_step next_obs = {'observation': current_observation} next_node = Node(next_obs) return next_node, cost
def get_successors(self, node, ac): obs = node.obs # Set the model to the sim state self.model.set_observation(obs, goal=True) # Step the model next_obs, cost = self.model.step(ac) # Check if it was a previously known incorrect transition if self.discrepancy_fn is not None: cost = self.discrepancy_fn(obs['observation'], ac, cost) # Create a node next_node = Node(next_obs) # print(obs['observation'], ac, next_obs['observation']) return next_node, cost
def get_successors(self, node, action): obs = node.obs set_gridworld_state_and_goal( self.model, obs['observation'].copy(), obs['desired_goal'].copy(), ) next_obs, cost, _, _ = self.model.step(action) if self.discrepancy_fn is not None: cost = self.discrepancy_fn(obs, action, cost, next_obs) next_node = Node(next_obs) return next_node, cost
def get_successors_no_kinematics(self, node, ac): if isinstance(node, Node): obs = node.obs else: obs = node next_cell = self.model.successor(obs['observation'], ac) next_obs = {'observation': next_cell, 'sim_state': None} cost = self.model.get_cost(obs['observation'], ac, next_cell) # Create a node next_node = Node(next_obs) # Check if it was a previously known incorrect transition if self.discrepancy_fn is not None: cost = self.discrepancy_fn(obs['observation'], ac, cost) if isinstance(node, Node): return next_node, cost return next_obs, cost
def _fill_cell_values_dijkstra(self, env): ''' Do Dijkstra and get good heuristic ''' goal_cell = env.goal_cell goal_observation = {'observation': goal_cell, 'sim_state': None} goal_node = Node(goal_observation) # TODO: Getting initial values by not using kinematics # Using kinematics is slow, since we have to do IK dijkstra_search = Dijkstra( self.controller.get_successors_no_kinematics, self.controller.actions) closed_set = dijkstra_search.get_dijkstra_heuristic(goal_node) for node in closed_set: cell = node.obs['observation'] self.cell_values[tuple(cell)] = node._g return
def get_successors(self, node, ac): obs = node.obs # Set the model to the sim state self.model.set_observation(obs, goal=True) # Step the model next_obs, cost = self.model.step(ac) # Check if residual dynamics is set if self.residual_dynamics_fn is not None: residual_correction = self.residual_dynamics_fn(obs, ac) # Convert next obs acc. to model to continuous continuous_next_state = self.model._grid_to_continuous( next_obs['observation']) # Add correction corrected_next_state = continuous_next_state + residual_correction # Convert continuous corrected next state to discrete next_obs['observation'] = self.model._continuous_to_grid( corrected_next_state) # Create a node next_node = Node(next_obs) return next_node, cost
def get_successors(self, node, mprim): # obs = node.obs # self.model.set_sim_state(copy.deepcopy(obs['true_state'])) # next_obs, reward, _, _ = self.model.step_mprim(mprim) # cost = -reward # if self.discrepancy_fn is not None: # cost = self.discrepancy_fn(obs, mprim, cost) # next_node = Node(next_obs) obs = node.obs cost = 0 # Step through all discrete states that the mprim goes through # and sum up the cost for discrete_state in mprim.discrete_states: xd, yd, thetad = discrete_state current_observation = np.array([ max(min(obs['observation'][0] + xd, X_DISCRETIZATION - 1), 0), max(min(obs['observation'][1] + yd, Y_DISCRETIZATION - 1), 0), thetad ], dtype=int) cost_step = self.cost_map[current_observation[0], current_observation[1]] # if self.discrepancy_fn is not None and (not discrepancy_state): # if self.discrepancy_fn(current_obs, mprim, 0) != 0: # discrepancy_state = True cost += cost_step if self.discrepancy_fn is not None: cost = self.discrepancy_fn(obs, mprim, cost) # if discrepancy_state: # cost = 1e6 next_obs = {'observation': current_observation} next_node = Node(next_obs) return next_node, cost
def act(self, obs): start_node = Node(obs) best_action, info = self.astar.act(start_node) return best_action, info
def get_successors_obs(self, obs, mprim): node = Node(obs) next_node, cost = self.get_successors(node, mprim) return next_node.obs, cost
def act(self, obs, limited=True): start_node = Node(obs) best_action, info = self.astar.act(start_node, limited=limited) return best_action, info