def finished(self, state): if self.is_not_pushing_box(state): flat = state.flatten() if flat[7] < -.25: return dist(state[5:7], np.zeros(2)) < .4 return dist(state[5:8], np.zeros(3)) < .3 else: flat = state.flatten() if flat[7] < -.2: # hole return flat[ 2] < -.15 # and dist(state[:2], state[5:7]) < self.box_distance_finished_condition return dist(state[:3], state[5:8]) < self.box_distance_finished_condition
def get_path_to_hole(self, start, finish, from_box_to_hole=True): priority_queue = [] node_to_prev_node = {} # maps a node to the previous node that had called dijkstra on it visited = set() heapq.heappush(priority_queue, (0, start)) visited.add(start) iteration = 0 while True: if len(priority_queue) == 0: break distance_to_curr, curr_node = heapq.heappop(priority_queue) # assert curr_node not in visited visited.add(curr_node) if curr_node == finish: break neighbors = curr_node.neighbors if ( from_box_to_hole and curr_node == start) else curr_node.traversable_neighbors if finish in curr_node.neighbors and finish not in neighbors: # If the hole is nearby, then append to neighbors neighbors = neighbors + [finish] for n in neighbors: if n not in visited: # if it hasn't been visited, there's a possibility of getting better path nodes_in_pq = (list(map(lambda x: x[1], priority_queue))) if n in nodes_in_pq: # if it's currently in the pq, update the priority index = nodes_in_pq.index(n) curr_dist = distance_to_curr + dist(n.pos, curr_node.pos) prev_dist = priority_queue[index][0] if curr_dist < prev_dist: priority_queue[index] = (curr_dist, n) node_to_prev_node[n] = curr_node else: # otherwise add it to the pq heapq.heappush(priority_queue, (distance_to_curr + dist(n.pos, curr_node.pos), n)) node_to_prev_node[n] = curr_node prev_node = curr_node iteration += 1 # Getting the path: if finish not in node_to_prev_node.keys(): return [start] # Returns only the first node if there is no path curr_node = finish path = [finish] while curr_node != start: curr_node = node_to_prev_node[curr_node] path.insert(0, curr_node) return path
def update_hole_pheromones(self, path): hole_pheromones = {p for p in self.pheromone_set if 0 <= p < PHEROMONE_ID_STAGGER} for node in path: neighbors = node.traversable_neighbors if len(neighbors) > 0: for p in hole_pheromones: max_p = max([n.pheromones[p] * np.exp(-.1 * dist(node.pos, n.pos)) for n in neighbors]) if node.pheromones[p] < max_p: node.pheromones[p] = max_p return
def receive_state_info(self, msg): state = np.array(vrep.simxUnpackFloats(msg.data)) if self.shut_down: msg = Vector3() msg.x = 1 msg.y = 1 self.pub.publish(msg) elif self.finished(state): if (not self.is_not_pushing_box(state) ) and state[7] < -.25 and dist(state[5:7], np.zeros(2)) < 1: # hole msg = Vector3() msg.x = -2 msg.y = -2 else: msg = Vector3() msg.x = 0 msg.y = 0 self.pub.publish(msg) if self.counter == 0 and not self.shut_down: if self.finished(state): if not self.waiting_for_target: rospy.sleep(1) self.publish_finished_to_map() self.waiting_for_target = True rospy.wait_for_message("/target" + str(self.robot_id), Vector3) self.waiting_for_target = False else: self.controller.goal = state.ravel()[:2] if self.is_not_pushing_box(state): action_index = 0 if abs( state[6] ) > .5 else 1 # align yourself otherwise travel towards node else: if any(np.isnan(state)): print(state) assert not any(np.isnan(state)) action_index = self.policy.get_action(state, testing_time=True, probabilistic=True) action_name = action_map[action_index] adjusted_state_for_controls = self.controller.feature_2_task_state( state) left_right_frequencies = self.controller.getPrimitive( adjusted_state_for_controls, action_name) msg = Vector3() msg.x = left_right_frequencies[0] msg.y = left_right_frequencies[1] self.pub.publish(msg) self.counter = (self.counter + 1) % self.period return
def gaussian_explore_q(self, s, testing_time): exploration = np.random.random() if exploration < self.explore and not testing_time: if len(self.model.transitions ) > 400 and self.success_states.shape[0] >= 2: curr_size = self.success_states.shape[0] - 1 if curr_size == 0: return np.random.randint(self.u_n) sample_size = min(1000, curr_size) success_states = self.success_states[1:, :] past_states = success_states[-sample_size:, :] actions = [i for i in range(self.u_n)] states = np.repeat(s, self.u_n, axis=0) states = self.concatenate_identifier(states) next_states = self.model.predict(states, actions).detach().numpy() """ if self.gmm_counter % self.gmm_period == 0: self.gmm = GaussianMixture(n_components=1) self.gmm.fit(past_states) log_prob = self.gmm.score_samples(next_states) log_prob = log_prob - np.min(log_prob) log_prob = np.exp(log_prob) probs = log_prob / np.sum(log_prob) print('probs: ', probs) self.gmm_counter += 1 self.success_states = self.success_states[:5000, :]""" """mean = np.mean(past_states, axis=0) centered = past_states - mean covariance = np.matmul(centered.T, centered) / (past_states.shape[0]) covariance = covariance + np.eye(covariance.shape[0]) * 1e-2 pdf_generator = lambda x: multivariate_normal.pdf(x, mean=mean, cov=covariance) pdf_of_next_states = np.apply_along_axis(pdf_generator, 1, next_states) pdf_of_next_states = pdf_of_next_states - np.min(pdf_of_next_states) probs = np.exp(pdf_of_next_states) probs = probs / np.sum(probs)""" avg_end_state = np.mean(past_states, axis=0) probs = np.array([ np.exp(-7 * dist(avg_end_state, state)) for state in next_states ]) probs = probs / np.sum(probs) uniform = np.ones((1, self.u_n)) / self.u_n q_lookahead = np.max(self.policy.get_q( self.concatenate_identifier(next_states)).detach().numpy(), axis=1) q_lookahead = np.exp(q_lookahead) q_lookahead = q_lookahead / np.sum(q_lookahead) probs = self.weight_towards_model * probs + ( 1 - self.weight_towards_model) * uniform # it's qlookahead rn self.weight_towards_model = max( .1, self.weight_towards_model * .999) probs = probs.flatten() probs = probs / np.sum(probs) # just in case print('weight:', self.weight_towards_model, ' probs:', probs) choice = np.random.choice(self.u_n, p=probs) return actions[choice] else: return np.random.randint(self.u_n) else: return self.policy.get_action( self.concatenate_identifier(s), testing_time, probabilistic=(self.policy_action_selection == 'PROB'))
def get_detection_distance_between_nodes(self, first, second): first_position = deepcopy(first.pos) second_position = deepcopy(second.pos) first_position[2] = first_position[2] - sum([b.height for b in first.box]) second_position[2] = second_position[2] - sum([b.height for b in second.box]) return dist(first_position, second_position)
def get_distance_between_nodes(self, first, second): return dist(first.pos[:2], second.pos[:2])