def sample_action(self, current_state, explore=True): # generate sensor data array_laser = utils.remapping_laser_data(current_state.laserScan) sensor = Variable( torch.FloatTensor(np.reshape(array_laser, (1, self.sensor_dim)))) # generate target data target_polar = utils.target_transform(current_state) target = Variable( torch.FloatTensor(np.reshape(target_polar, (1, self.target_dim)))) # generate action action = self.actor_ca(sensor=sensor, target=target).cpu().data.numpy() if explore and random.uniform(0, 1) > 0.7 and self.train_type is not 3: action = action + self.noise.sample() #action[0][0] = random.uniform(-1,1) #action[0][1] = random.uniform(-1,1) # constrain the action action[0][0] = utils.constrain_actions(action[0][0], 1) action[0][1] = utils.constrain_actions(action[0][1], 1) return action[0][0], action[0][1]
def navigation(self, current_state): # generate sensor data array_laser = utils.remapping_laser_data(current_state.laserScan) sensor = Variable(torch.FloatTensor(np.reshape(array_laser, (1, self.sensor_dim)))) # generate target data target_polar = utils.target_transform(current_state) target = Variable(torch.FloatTensor(np.reshape(target_polar, (1, self.target_dim)))) # generate action target_driven_action = self.differential_driver.run(x=current_state.desired_x, y=current_state.desired_y) collision_avoidance_action = self.actor_ca(sensor=sensor, target=target).cpu().data.numpy() predict_state = self.evaluation_net.predict_state(array_laser.reshape(1, -1)) # Collision avoidance ratio ratio = min(float(torch.kthvalue(sensor,1)[0]) / (-3.5), 1) # print ratio final_action = [] for i in range(2): final_action.append((1.0 - ratio) * target_driven_action[0][i] + ratio * collision_avoidance_action[0][i] ) # print final_action # constrain the action final_action[0] = utils.constrain_actions(final_action[0], 1) final_action[1] = utils.constrain_actions(final_action[1], 1) # print final_action return final_action[0], final_action[1]
def sample_action(self, current_state, laser_data, explore=True): # reshape is needed here, because single sample and batch sample should be both 2-dim state = Variable( torch.FloatTensor( np.transpose( np.reshape( [current_state.desired_x, current_state.desired_y], (self.state_dim, 1))))) # generate sensor data array_laser_1 = utils.remapping_laser_data(laser_data[0]) array_laser_2 = utils.remapping_laser_data(laser_data[1]) array_laser_3 = utils.remapping_laser_data(current_state.laserScan) sensor_1 = Variable( torch.FloatTensor(np.reshape(array_laser_1, (1, self.sensor_dim)))) sensor_2 = Variable( torch.FloatTensor(np.reshape(array_laser_2, (1, self.sensor_dim)))) sensor_3 = Variable( torch.FloatTensor(np.reshape(array_laser_3, (1, self.sensor_dim)))) sensor = torch.cat([sensor_1, sensor_2, sensor_3], 1) # generate action if self.train_type == 1: action = self.actor_td(state=state).cpu().data.numpy() elif self.train_type == 2: action = self.actor_ca(sensor=sensor).cpu().data.numpy() else: action = self.differential_driver.run(x=current_state.desired_x, y=current_state.desired_y) if explore and random.uniform(0, 1) > 0.7 and self.train_type is not 3: action = action + self.noise.sample() #action[0][0] = random.uniform(-1,1) #action[0][1] = random.uniform(-1,1) # constrain the action action[0][0] = utils.constrain_actions(action[0][0], 1) action[0][1] = utils.constrain_actions(action[0][1], 1) return action[0][0], action[0][1]
crash_time += 1 time.sleep(0.1) resp_ = pytorch_io_service(all_controls) # make sure reset operation has been done # check if one agent start a new loop for i_flag in range(AGENT_NUMBER): if terminate_flag[i_flag] == 1: terminate_flag[i_flag] = 0 # reset flag addition_experience += 1 experimence_mapping[i_flag] = addition_experience # temporary save experience, each loop of each agent should be saved separately for i_agents in range(AGENT_NUMBER): episode_experience[experimence_mapping[i_agents]].append(utils.combine_states(all_current_states, all_next_states, all_controls, i_agents)) if SAVE_LIDAR is True: show_lidar_agents[i_agents].append(utils.remapping_laser_data(all_current_states.group_state[i_agents].laserScan)) ##################################### ##### For experience generating ##### ##################################### for i_experiment in episode_experience.keys(): step_number = len(episode_experience[i_experiment]) # forget too short experimence if step_number < MIN_EXPERIMENCE_NUMBER: continue # original HER experience new_goals = utils.sample_new_targets(episode_experience[i_experiment], HER_K) for i_step in range(step_number):