def create_sampled_dataset(original_dataset, rate): dataset = {} for enum_user, user in enumerate(original_dataset.keys()): dataset[user] = {} for enum_video, video in enumerate(original_dataset[user].keys()): print('creating sampled dataset', 'video', enum_video, '/', len(original_dataset[user].keys()), 'user', enum_user, '/', len(original_dataset.keys())) sample_orig = np.array([1, 0, 0]) data_per_video = [] for sample in original_dataset[user][video]: sample_yaw, sample_pitch = transform_the_degrees_in_range( sample['yaw'], sample['pitch']) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) quat_rot = rotationBetweenVectors(sample_orig, sample_new) # append the quaternion to the list data_per_video.append([ sample['sec'], quat_rot[0], quat_rot[1], quat_rot[2], quat_rot[3] ]) # update the values of time and sample # interpolate the quaternions to have a rate of 0.2 secs data_per_video = np.array(data_per_video) # In this case the time starts counting at random parts of the video dataset[user][video] = interpolate_quaternions( data_per_video[:, 0], data_per_video[:, 1:], rate=rate, time_orig_at_zero=False) return dataset
def transform_angles_for_model(trace): new_trace = [] for sample in trace: sample_yaw, sample_pitch = transform_the_degrees_in_range(sample[0], sample[1]) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) new_trace.append(sample_new) return np.array(new_trace)
def create_sampled_dataset(original_dataset, rate): dataset = {} for user in original_dataset.keys(): dataset[user] = {} for video in original_dataset[user].keys(): print('creating sampled dataset', user, video) sample_orig = np.array([1, 0, 0]) data_per_video = [] for sample in original_dataset[user][video]: sample_yaw, sample_pitch = transform_the_degrees_in_range( sample['yaw'], sample['pitch']) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) quat_rot = rotationBetweenVectors(sample_orig, sample_new) # append the quaternion to the list data_per_video.append([ sample['sec'], quat_rot[0], quat_rot[1], quat_rot[2], quat_rot[3] ]) # update the values of time and sample # interpolate the quaternions to have a rate of 0.2 secs data_per_video = np.array(data_per_video) dataset[user][video] = interpolate_quaternions(data_per_video[:, 0], data_per_video[:, 1:], rate=rate) return dataset
def from_position_to_tile_probability_cartesian(pos): yaw_grid, pitch_grid = np.meshgrid(np.linspace(0, 1, NUM_TILES_WIDTH, endpoint=False), np.linspace(0, 1, NUM_TILES_HEIGHT, endpoint=False)) yaw_grid += 1.0 / (2.0 * NUM_TILES_WIDTH) pitch_grid += 1.0 / (2.0 * NUM_TILES_HEIGHT) yaw_grid = yaw_grid * 2*np.pi pitch_grid = pitch_grid * np.pi x_grid, y_grid, z_grid = eulerian_to_cartesian(theta=yaw_grid, phi=pitch_grid) great_circle_distance = np.arccos(np.maximum(np.minimum(x_grid * pos[0] + y_grid * pos[1] + z_grid * pos[2], 1.0), -1.0)) binary_orth = np.where(great_circle_distance < (((FOV_SIZE/2.0)/180.0)*np.pi), 1, 0) return binary_orth
def from_position_to_tile_probability_cartesian(pos): yaw_grid, pitch_grid = np.meshgrid(np.linspace(0, 1, NUM_TILES_WIDTH_TRUE_SAL, endpoint=False), np.linspace(0, 1, NUM_TILES_HEIGHT_TRUE_SAL, endpoint=False)) yaw_grid += 1.0 / (2.0 * NUM_TILES_WIDTH_TRUE_SAL) pitch_grid += 1.0 / (2.0 * NUM_TILES_HEIGHT_TRUE_SAL) yaw_grid = yaw_grid * 2 * np.pi pitch_grid = pitch_grid * np.pi x_grid, y_grid, z_grid = eulerian_to_cartesian(theta=yaw_grid, phi=pitch_grid) great_circle_distance = np.arccos(np.maximum(np.minimum(x_grid * pos[0] + y_grid * pos[1] + z_grid * pos[2], 1.0), -1.0)) gaussian_orth = np.exp((-1.0 / (2.0 * np.square(0.1))) * np.square(great_circle_distance)) return gaussian_orth
def get_most_salient_content_based_points_per_video(videos, saliency_folder, k=1): most_salient_points_per_video = {} for video in videos: saliencies_for_video = load_saliency(saliency_folder, video, RUN_IN_SERVER=False) most_salient_points_in_video = [] for id, sal in enumerate(saliencies_for_video): coordinates = peak_local_max(sal, exclude_border=False, num_peaks=k) coordinates_normalized = coordinates / np.array([NUM_TILES_HEIGHT, NUM_TILES_WIDTH]) coordinates_radians = coordinates_normalized * np.array([np.pi, 2.0*np.pi]) cartesian_pts = np.array([eulerian_to_cartesian(sample[1], sample[0]) for sample in coordinates_radians]) most_salient_points_in_video.append(cartesian_pts) most_salient_points_per_video[video] = np.array(most_salient_points_in_video) return most_salient_points_per_video
def transform_dataset_in_cartesian(original_dataset): dataset = {} for enum_user, user in enumerate(original_dataset.keys()): dataset[user] = {} for enum_video, video in enumerate(original_dataset[user].keys()): print('creating cartesian dataset', 'user', enum_user, '/', len(original_dataset.keys()), 'video', enum_video, '/', len(original_dataset[user].keys())) data_per_video = [] for sample in original_dataset[user][video]: sample_yaw, sample_pitch = transform_the_degrees_in_range(sample['yaw'], sample['pitch']) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) data_per_video.append(sample_new) dataset[user][video] = np.array(data_per_video) return dataset
def create_and_store_tile_probability_replica(original_dataset): if not os.path.exists(OUTPUT_TILE_PROB_FOLDER): os.makedirs(OUTPUT_TILE_PROB_FOLDER) for enum_user, user in enumerate(original_dataset.keys()): for enum_video, video in enumerate(original_dataset[user].keys()): print('creating tiles for', 'user', enum_user, '/', len(original_dataset.keys()), 'video', enum_video, '/', len(original_dataset[user].keys())) tile_prob_for_trace = [] for sample_id, sample in enumerate(original_dataset[user][video]): sample_yaw, sample_pitch = transform_the_degrees_in_range(sample['yaw'], sample['pitch']) sample_new = eulerian_to_cartesian(sample_yaw, sample_pitch) gen_tile_prob_cartesian = from_position_to_tile_probability_cartesian(sample_new) tile_prob_for_trace.append(gen_tile_prob_cartesian) filename = '%s_%s_created_tile.npy' % (video, user) file_path = os.path.join(OUTPUT_TILE_PROB_FOLDER, filename) np.save(file_path, np.array(tile_prob_for_trace))
def get_most_salient_points_per_video(videos, true_saliency_folder, k=1): most_salient_points_per_video = {} for video in videos: saliencies_for_video_file = os.path.join(true_saliency_folder, video + '.npy') saliencies_for_video = np.load(saliencies_for_video_file) most_salient_points_in_video = [] for id, sal in enumerate(saliencies_for_video): coordinates = peak_local_max(sal, exclude_border=False, num_peaks=k) coordinates_normalized = coordinates / np.array( [NUM_TILES_HEIGHT_TRUE_SAL, NUM_TILES_WIDTH_TRUE_SAL]) coordinates_radians = coordinates_normalized * np.array( [np.pi, 2.0 * np.pi]) cartesian_pts = np.array([ eulerian_to_cartesian(sample[1], sample[0]) for sample in coordinates_radians ]) most_salient_points_in_video.append(cartesian_pts) most_salient_points_per_video[video] = np.array( most_salient_points_in_video) return most_salient_points_per_video
def get_most_salient_points_per_video(): from skimage.feature import peak_local_max most_salient_points_per_video = {} for video in VIDEOS: saliencies_for_video_file = os.path.join(OUTPUT_TRUE_SALIENCY_FOLDER, video + '.npy') saliencies_for_video = np.load(saliencies_for_video_file) most_salient_points_in_video = [] for id, sal in enumerate(saliencies_for_video): coordinates = peak_local_max(sal, exclude_border=False, num_peaks=5) coordinates_normalized = coordinates / np.array( [NUM_TILES_HEIGHT_TRUE_SAL, NUM_TILES_WIDTH_TRUE_SAL]) coordinates_radians = coordinates_normalized * np.array( [np.pi, 2.0 * np.pi]) cartesian_pts = np.array([ eulerian_to_cartesian(sample[1], sample[0]) for sample in coordinates_radians ]) most_salient_points_in_video.append(cartesian_pts) most_salient_points_per_video[video] = np.array( most_salient_points_in_video) return most_salient_points_per_video
def compute_error(model_name): if model_name == 'CVPR18': model = create_CVPR18_model(M_WINDOW, H_WINDOW, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) model.load_weights( os.path.join( ROOT_FOLDER, 'CVPR18', 'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_13_end_13', 'weights.hdf5')) if model_name == 'TRACK': model = create_TRACK_model(M_WINDOW, H_WINDOW, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) weights_file = os.path.join( ROOT_FOLDER, 'TRACK', 'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_13_end_13', 'weights.hdf5') if os.path.isfile(weights_file): model.load_weights(weights_file) else: raise Exception( 'Sorry, the folder ./Nguyen_MM_18/TRACK/ doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"python training_procedure.py -train -gpu_id 0 -dataset_name Nguyen_MM_18 -model_name TRACK -m_window 5 -h_window 13 -provided_videos\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:u:/g/personal/miguel_romero-rondon_unice_fr/EYNvRsxKh1FCiJrhudfBMUsBhp1oB5m3fxTYa8kkZHOcSA?e=eC2Plz' ) if model_name == 'pos_only': model = create_pos_only_model(M_WINDOW, H_WINDOW) weights_file = os.path.join( ROOT_FOLDER, 'pos_only', 'Models_EncDec_eulerian_init_5_in_5_out_13_end_13', 'weights.hdf5') if os.path.isfile(weights_file): model.load_weights(weights_file) else: raise Exception( 'Sorry, the folder ./Nguyen_MM_18/pos_only/ doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"python training_procedure.py -train -gpu_id 0 -dataset_name Nguyen_MM_18 -model_name pos_only -m_window 5 -h_window 13 -provided_videos\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:u:/g/personal/miguel_romero-rondon_unice_fr/EWO4VEQP2GtMp6NEZBMZA-QBpuXFo6WG2jQb-muvPc_ejw?e=iaPbYp' ) # From the paper MM18: # We use the input feature from the past one second to predict the head orientation in the future. one_second_in_timesteps = int(np.ceil(1.0 / ORIGINAL_SAMPLING_RATE)) one_timestep_models = MODEL_SAMPLING_RATE / ORIGINAL_SAMPLING_RATE # From the paper MM18: # "The default prediction window k is set to be 0.5 seconds. # To explore the effect of prediction window k on the accuracy of the proposed model and other three benchmarks, we vary k from 0.5 seconds to 2.5 seconds." prediction_horizons = [0.5, 1, 1.5, 2, 2.5] results_for_pred_horizon = {} for pred_hor in prediction_horizons: results_for_pred_horizon[pred_hor] = [] prediction_horizon_in_timesteps = int( np.ceil(pred_hor / ORIGINAL_SAMPLING_RATE)) for enum_video, video in enumerate(VIDEOS_TEST): saliency = salient_ds_dict['360net'][video]['salient'] # preprocess saliency saliency = np.array([ cv2.resize(sal, (NUM_TILES_WIDTH, NUM_TILES_HEIGHT)) for sal in saliency ]) saliency = np.array([(sal * 1.0 - sal.min()) for sal in saliency]) saliency = np.array([(sal / sal.max()) * 255 for sal in saliency]) saliency = np.array([post_filter(sal) for sal in saliency]) saliency = np.array([ mmscaler.fit_transform(salmap.ravel().reshape(-1, 1)).reshape( salmap.shape) for salmap in saliency ]) saliency = np.expand_dims(saliency, -1) for user in USERS: print('computing', args.model_name, 'baseline error for video', enum_video, '/', len(VIDEOS_TEST), 'user', user, '/', len(USERS), 'prediction_horizon', pred_hor) trace = salient_ds_dict['360net'][video]['headpos'][user] trace_for_model = np.array( [vector_to_ang(point) for point in trace]) trace_for_model = np.array([ transform_the_degrees_in_range(sample[0], sample[1]) for sample in trace_for_model ]) trace_for_model = np.array([ eulerian_to_cartesian(sample[0], sample[1]) for sample in trace_for_model ]) for t in range(one_second_in_timesteps, len(trace) - prediction_horizon_in_timesteps): if model_name == 'no_motion': model_pred = create_head_map(trace[t]) if model_name == 'TRACK': pos_input = trace_for_model[t - one_second_in_timesteps:t] saliency_input = saliency[ t - int(np.ceil((M_WINDOW - 1) * one_timestep_models)):t + int(np.ceil(one_timestep_models * (H_WINDOW + 1)))] model_pred = get_TRACK_prediction( model, pos_input, saliency_input, pred_hor) if model_name == 'CVPR18': pos_input = trace_for_model[t - one_second_in_timesteps:t] saliency_input = saliency[ t - int(np.ceil((M_WINDOW - 1) * one_timestep_models)):t + int(np.ceil(one_timestep_models * (H_WINDOW + 1)))] model_pred = get_CVPR18_prediction( model, pos_input, saliency_input, pred_hor) if model_name == 'pos_only': pos_input = trace_for_model[t - one_second_in_timesteps:t] model_pred = get_pos_only_prediction( model, pos_input, pred_hor) groundtruth = create_head_map( trace[t + prediction_horizon_in_timesteps]) results_for_pred_horizon[pred_hor].append( compute_accuracy_metric(model_pred, groundtruth)) print(pred_hor, np.mean(results_for_pred_horizon[pred_hor])) return results_for_pred_horizon
def transform_normalized_eulerian_to_cartesian(position): position = position * np.array([2 * np.pi, np.pi]) eulerian_samples = eulerian_to_cartesian(position[0], position[1]) return np.array(eulerian_samples)
def transform_normalized_eulerian_to_cartesian(positions): positions = positions * np.array([2*np.pi, np.pi]) eulerian_samples = [eulerian_to_cartesian(pos[0], pos[1]) for pos in positions] return np.array(eulerian_samples)
model_prediction = np.array([get_max_sal_pos(sal, dataset_name) for sal in decoder_sal_inputs_for_sample[0, :, :, 0]]) elif model_name == 'true_saliency': model_prediction = predict_most_salient_point(decoder_true_sal_inputs_for_sample, decoder_pos_inputs_for_sample[0, 0]) elif model_name == 'content_based_saliency': model_prediction = predict_most_salient_cb_point(decoder_true_sal_inputs_for_sample, decoder_pos_inputs_for_sample[0, 0]) elif model_name == 'CVPR18_orig': initial_pos_inputs = transform_batches_cartesian_to_normalized_eulerian(encoder_pos_inputs_for_sample) model_pred = auto_regressive_prediction(model, initial_pos_inputs, decoder_sal_inputs_for_sample, M_WINDOW, H_WINDOW) model_prediction = transform_normalized_eulerian_to_cartesian(model_pred) elif model_name == 'MM18': model_prediction = [] groundtruth = [] for _h_window in range(H_WINDOW): model_pred = mm18_models[_h_window].predict(encoder_sal_inputs_for_sample)[0] model_pred_norm_eul = MM18_model.model_pred_in_normalized_eulerian(model_pred) model_prediction.append(eulerian_to_cartesian(model_pred_norm_eul[0], model_pred_norm_eul[1])) groundtruth_eulerian = MM18_model.model_pred_in_normalized_eulerian(all_headmaps[video][user][x_i+_h_window+1]) groundtruth.append(eulerian_to_cartesian(groundtruth_eulerian[0], groundtruth_eulerian[1])) for t in range(len(groundtruth)): if t not in errors_per_video[video].keys(): errors_per_video[video][t] = [] errors_per_video[video][t].append(metric(groundtruth[t], model_prediction[t])) if t not in errors_per_timestep.keys(): errors_per_timestep[t] = [] errors_per_timestep[t].append(metric(groundtruth[t], model_prediction[t])) for video_name in videos_test: for t in range(H_WINDOW): print(video_name, t, np.mean(errors_per_video[video_name][t]), end=';')