def set_dset_list(self, data_dir, down_sampling=True): """ Fill scene_information with the static environment features that will be used as part of the input of Static Scene Feature Extractor module in SafeGAN""" _dir = os.path.dirname(os.path.realpath(__file__)) _dir = _dir.split("/")[:-2] _dir = "/".join(_dir) directory = _dir + '/datasets/safegan_dataset/' self.list_data_files = sorted([ get_dset_name(os.path.join(data_dir, _path).split("/")[-1]) for _path in os.listdir(data_dir) ]) for name in self.list_data_files: path_group = os.path.join(directory, get_dset_group_name(name)) """ The inputs are the boundary points between the traversable and non-traversable areas. It is possible to take all points or just a sample""" path = os.path.join(path_group, name) map = np.load(path + "/world_points_boundary.npy") if self.down_samples != -1 and down_sampling and map.shape[ 0] > self.down_samples: down_sampling = (map.shape[0] // self.down_samples) sampled = map[::down_sampling] map = sampled[:self.down_samples] self.scene_information[name] = torch.from_numpy(map).type( torch.float).to(device)
def set_dset_list(self, data_dir, down_sampling=True, down_samples=200): directory = get_root_dir() + '/datasets/safegan_dataset/' self.list_data_files = sorted([get_dset_name(os.path.join(data_dir, _path).split("/")[-1]) for _path in os.listdir(data_dir)]) for name in self.list_data_files: path_group = os.path.join(directory, get_dset_group_name(name)) """ The inputs are the boundary points between the traversable and non-traversable areas. It is possible to take all points or just a sample""" path = os.path.join(path_group, name) map = np.load(path + "/world_points_boundary.npy") if down_samples != -1 and down_sampling and map.shape[0] > down_samples: down_sampling = (map.shape[0] // down_samples) sampled = map[::down_sampling] map = sampled[:down_samples] self.scene_information[name] = torch.from_numpy(map).type(torch.float).to(device)
def set_dset_list(self, data_dir): """ Fill scene_information with the static environment features that will be used as part of the input of Static Scene Feature Extractor module in SafeGAN""" directory = get_root_dir() + '/datasets/safegan_dataset/' self.list_data_files = sorted([ get_dset_name(os.path.join(data_dir, _path).split("/")[-1]) for _path in os.listdir(data_dir) ]) for name in self.list_data_files: path_group = os.path.join(directory, get_dset_group_name(name)) if self.pool_static_type == "physical_attention_no_encoder": """ In this case the features are the one extracted by one of Segmentation Networks I trained on the new dataset I created. The features are taken before the last upsample layers.""" path = os.path.join(path_group + "/segmented_features", name) features = np.load(path + "_segmentation_features.npy") features = torch.from_numpy(features).type( torch.float).to(device) elif self.pool_static_type == "physical_attention_with_encoder": """ In this case the input is the raw image or the segmented one (by one of the Segmentation Networks I trained on the new dataset I created). This image is then encoded by a Deep Network like ResNet""" path = os.path.join(path_group + "/segmented_scenes", name) image = plt.imread(path + ".jpg") image = torch.from_numpy(image).type(torch.float).to(device) # Images fed to the model must be a Float tensor of dimension N, 3, 256, 256, where N is the batch size. # PyTorch follows the NCHW convention, which means the channels dimension (C) must precede the size dimensions image = image.permute(2, 0, 1) # Normalize the image image = self.transform(image) features = self.attention_encoder(image.unsqueeze(0)) else: print( "ERROR in recognizing physical attention pool static type") exit() self.scene_information[name] = features
def forward(self, h_states, seq_start_end, end_pos, rel_pos, seq_scene_ids=None): """ Inputs: - h_states: Tesnsor of shape (num_layers, batch, h_dim) - seq_start_end: A list of tuples which delimit sequences within batch. - end_pos: Absolute end position of obs_traj (batch, 2) Output: - pool_h: Tensor of shape (batch, h_dim) """ pool_h = [] total_grid_size = self.grid_size * self.grid_size for i, (start, end) in enumerate(seq_start_end): start = start.item() end = end.item() num_ped = end - start curr_hidden = h_states.view(-1, self.h_dim)[start:end] curr_hidden_repeat = curr_hidden.repeat(num_ped, 1) curr_end_pos = end_pos[start:end] curr_pool_h_size = (num_ped * total_grid_size) + 1 curr_pool_h = curr_hidden.new_zeros( (curr_pool_h_size, self.h_dim)).to(device) # curr_end_pos = curr_end_pos.data top_left, bottom_right = self.get_bounds(curr_end_pos) # Repeat position -> P1, P2, P1, P2 curr_end_pos_rep = curr_end_pos.repeat(num_ped, 1) # Repeat bounds -> B1, B1, B2, B2 top_left = self.repeat(top_left, num_ped) bottom_right = self.repeat(bottom_right, num_ped) grid_pos = self.get_grid_locations( top_left, curr_end_pos_rep).type_as(seq_start_end) # Make all positions to exclude as non-zero # Find which peds to exclude x_bound = ((curr_end_pos_rep[:, 0] >= bottom_right[:, 0]) + (curr_end_pos_rep[:, 0] <= top_left[:, 0])) y_bound = ((curr_end_pos_rep[:, 1] >= top_left[:, 1]) + (curr_end_pos_rep[:, 1] <= bottom_right[:, 1])) within_bound = x_bound + y_bound within_bound[0::num_ped + 1] = 1 # Don't include the ped itself within_bound = within_bound.view(-1) # This is a tricky way to get scatter add to work. Helps me avoid a # for loop. Offset everything by 1. Use the initial 0 position to # dump all uncessary adds. grid_pos += 1 offset = torch.arange(0, total_grid_size * num_ped, total_grid_size).type_as(seq_start_end) offset = self.repeat(offset.view(-1, 1), num_ped).view(-1) grid_pos += offset grid_pos[within_bound != 0] = 0 grid_pos = grid_pos.view(-1, 1).expand_as(curr_hidden_repeat).to( device) # grid_pos = [num_ped**2, h_dim] curr_pool_h = curr_pool_h.scatter_add( 0, grid_pos, curr_hidden_repeat ) # curr_pool_h = [num_peds * total_grid_size + 1, h_dim], grid_pos = [num_peds**2], curr_hidden_repeat = [num_ped**2, h_dim] curr_pool_h = curr_pool_h[1:] if visualize_attention: # #pool_h.append(curr_pool_h.view(num_ped, -1)) # grid_size * grid_size * h_dim # Used for visualization embed_info = torch.cat([curr_end_pos, rel_pos[start:end]], dim=1) encoder_out = curr_pool_h.view(num_ped, total_grid_size, self.h_dim) curr_pool_h_after_attention, attention_weights = self.attention_decoder( encoder_out=encoder_out, curr_hidden=curr_hidden, embed_info=embed_info) data_dir = get_test_data_path('sdd') list_data_files = sorted([ get_dset_name( os.path.join(data_dir, _path).split("/")[-1]) for _path in os.listdir(data_dir) ]) seq_scenes = [list_data_files[num] for num in seq_scene_ids] visualize_attention_weights(seq_scenes[i], self.grid_size, attention_weights, end_pos[start:end], ax1, ax2) pool_h.append( curr_pool_h.view(num_ped, total_grid_size, self.h_dim)) # grid_size * grid_size * h_dim pool_h = torch.cat(pool_h, dim=0) encoder_out = pool_h.view(-1, total_grid_size, self.h_dim) embed_info = torch.cat([end_pos, rel_pos], dim=1) pool_h, attention_weights = self.attention_decoder( encoder_out=encoder_out, curr_hidden=h_states.squeeze(0), embed_info=embed_info) pool_h = self.mlp_pool(pool_h) return pool_h
def collect_generated_samples(args, generator1, generator2, data_dir, data_set, model_name, selected_scene=None, selected_batch=-1): num_samples = 10 # args.best_k _, loader = data_loader(args, data_dir, shuffle=False) with torch.no_grad(): for b, batch in enumerate(loader): print('batch = {}'.format(b)) batch = [tensor.cuda() for tensor in batch] if b != selected_batch and selected_batch != -1: continue (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, non_linear_ped, loss_mask, traj_frames, seq_start_end, seq_scene_ids) = batch list_data_files = sorted([ get_dset_name(os.path.join(data_dir, _path).split("/")[-1]) for _path in os.listdir(data_dir) ]) seq_scenes = [list_data_files[num] for num in seq_scene_ids] photo_list, homography_list, annotated_points_list, scene_name_list, scene_information = [], [], [], [], {} for i, (start, end) in enumerate(seq_start_end): dataset_name = seq_scenes[i] path = get_path(dataset_name) reader = imageio.get_reader(get_sdd_dir(dataset_name, 'video'), 'ffmpeg') annotated_points, h = get_homography_and_map( dataset_name, "/world_points_boundary.npy") homography_list.append(h) annotated_points_list.append(annotated_points) scene_name_list.append(dataset_name) scene_information[dataset_name] = annotated_points start = start.item() (obs_len, batch_size, _) = obs_traj.size() frame = traj_frames[obs_len][start][0].item() photo = reader.get_data(int(frame)) photo_list.append(photo) scene_name = np.unique(scene_name_list) if selected_scene != None and not (scene_name == selected_scene).all(): print(selected_scene, ' is not in current batch ', scene_name) continue save_pickle(obs_traj, 'obs_traj', selected_scene, b, data_set, model_name) save_pickle(pred_traj_gt, 'pred_traj_gt', selected_scene, b, data_set, model_name) save_pickle(seq_start_end, 'seq_start_end', selected_scene, b, data_set, model_name) save_pickle(homography_list, 'homography_list', selected_scene, b, data_set, model_name) save_pickle(annotated_points_list, 'annotated_points_list', selected_scene, b, data_set, model_name) save_pickle(photo_list, 'photo_list', selected_scene, b, data_set, model_name) save_pickle(scene_name_list, 'scene_name_list', selected_scene, b, data_set, model_name) save_pickle(scene_information, 'scene_information', selected_scene, b, data_set, model_name) pred_traj_fake1_list, pred_traj_fake2_list = [], [] for sample in range(num_samples): pred_traj_fake1, _ = get_trajectories(generator1, obs_traj, obs_traj_rel, seq_start_end, pred_traj_gt, seq_scene_ids, data_dir) pred_traj_fake2, _ = get_trajectories(generator2, obs_traj, obs_traj_rel, seq_start_end, pred_traj_gt, seq_scene_ids, data_dir) pred_traj_fake1_list.append(pred_traj_fake1) pred_traj_fake2_list.append(pred_traj_fake2) save_pickle(pred_traj_fake1_list, 'pred_traj_fake1_list', selected_scene, b, data_set, model_name) save_pickle(pred_traj_fake2_list, 'pred_traj_fake2_list', selected_scene, b, data_set, model_name)