def CVPR18_metrics(original_dataset_cartesian): M_WINDOW_TRAINED_MODEL = 5 H_WINDOW_TRAINED_MODEL = 25 traces_train, traces_test = get_traces_for_train_and_test() model = create_CVPR18_model(M_WINDOW_TRAINED_MODEL, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL) model.load_weights( os.path.join( ROOT_FOLDER, 'CVPR18', 'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_25_end_25', 'weights_100.hdf5')) accuracy_results = [] f1_score_results = [] ranking_results = [] for trace_num, trace in enumerate(traces_test): print('computing CVPR18 metrics for trace', trace_num, '/', len(traces_test)) user = trace['user'] video = trace['video'] repl_tiles_map = read_replica_tile_info(video, user) saliency_in_video = load_saliency(SALIENCY_FOLDER, video) for t in range(M_WINDOW, len(original_dataset_cartesian[user][video]) - H_WINDOW): past_positions = original_dataset_cartesian[user][video][ t - M_WINDOW:t + 1] # ToDo: The value "6" is hardcoder, it comes from "int(MODEL_SAMPLING_RATE / ORIGINAL_SAMPLING_RATE)" curr_id_in_model_steps = int(t / 6) sal_decoder = np.zeros( (1, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL, 1)) picked_sal_decoder = saliency_in_video[curr_id_in_model_steps + 1:curr_id_in_model_steps + H_WINDOW_TRAINED_MODEL + 1] sal_decoder[0, :len(picked_sal_decoder), :, :, 0] = picked_sal_decoder pred_tile_map = get_CVPR18_prediction(model, past_positions, M_WINDOW_TRAINED_MODEL, sal_decoder) # future_positions = original_dataset_cartesian[user][video][t+1:t+H_WINDOW+1] future_tile_maps = repl_tiles_map[t + 1:t + H_WINDOW + 1] for x_i, tile_map in enumerate(future_tile_maps): accuracy_results.append( accuracy_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map[x_i]))) f1_score_results.append( f1_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map[x_i]))) ranking_results.append( label_ranking_loss(tile_map, pred_tile_map[x_i])) print('CVPR18:\tAccuracy', np.mean(accuracy_results) * 100, 'F-Score', np.mean(f1_score_results), 'Rank. Loss', np.mean(ranking_results))
def get_most_salient_content_based_points_per_video(videos, saliency_folder, k=1): most_salient_points_per_video = {} for video in videos: saliencies_for_video = load_saliency(saliency_folder, video, RUN_IN_SERVER=False) most_salient_points_in_video = [] for id, sal in enumerate(saliencies_for_video): coordinates = peak_local_max(sal, exclude_border=False, num_peaks=k) coordinates_normalized = coordinates / np.array([NUM_TILES_HEIGHT, NUM_TILES_WIDTH]) coordinates_radians = coordinates_normalized * np.array([np.pi, 2.0*np.pi]) cartesian_pts = np.array([eulerian_to_cartesian(sample[1], sample[0]) for sample in coordinates_radians]) most_salient_points_in_video.append(cartesian_pts) most_salient_points_per_video[video] = np.array(most_salient_points_in_video) return most_salient_points_per_video
def compute_pretrained_model_error(dataset, videos_list, model_name, history_window, model_weights_path): if model_name == 'TRACK': model = create_TRACK_model(history_window, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) elif model_name == 'CVPR18': model = create_CVPR18_model(history_window, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) elif model_name == 'pos_only': model = create_pos_only_model(history_window, TRAINED_PREDICTION_HORIZON) if os.path.isfile(model_weights_path): model.load_weights(model_weights_path) else: command = 'python training_procedure.py -train -gpu_id 0 -dataset_name Xu_PAMI_18 -model_name %s -m_window 5 -h_window 5 -exp_folder sampled_by_frame_dataset -provided_videos' % model_name raise Exception( 'Sorry, the file '+model_weights_path+' doesn\'t exist.\nYou can:\n* Create it using the command:\n\t\"'+command+'\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/EvQmshggLahKnBjIehzAbY0Bd-JDlzzFPYw9_R8IrGjQPA?e=Yk7f3c') saliency_folder = os.path.join(ROOT_FOLDER, 'extract_saliency/saliency') if model_name not in ['pos_only']: all_saliencies = {} for video in videos_list: if os.path.isdir(saliency_folder): all_saliencies[video] = load_saliency(saliency_folder, video) else: raise Exception('Sorry, the folder ./Xu_PAMI_18/extract_saliency doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"./Xu_PAMI_18/dataset/creation_of_scaled_images.sh\n\tpython ./Extract_Saliency/panosalnet.py -dataset_name PAMI_18\" or \n* Download the folder from:https://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/Eir98fXEHKRBq9j-bgKGNTYBNN-_FQkvisJ1j9kOeVrB-Q?e=50lCOb\n\t') mo_calculator = MeanOverlap(3840, 1920, 65.5 / 2, 3.0 / 4.0) error_per_video = {} for user in dataset.keys(): for video in VIDEOS_TEST: time_stamps_in_saliency = np.arange(0.0, len(all_saliencies[video]) * 0.2, 0.2) print('computing error for user', user, 'and video', video) angles_per_video = recover_original_angles_from_quaternions_trace(dataset[user][video]) if video not in error_per_video.keys(): error_per_video[video] = [] # 1. Find the first time-stamp greater than 1 second (so that the input of the trace is greater than 5 when sampled in 0.2) # 1.1 Get the video rate (This is also the index of the first time-stamp at 1 sec) video_rate = int(np.ceil(get_frame_rate(video, hardcoded=True))) for t in range(video_rate, len(angles_per_video) - 1): # Remember that python arrays do not include the last index when sliced, e.g. [0, 1, 2, 3][:2] = [0, 1], in this case the input_data doesn't include the value at t+1 input_data = dataset[user][video][t-video_rate:t+1] sample_t_n = angles_per_video[t+1] sampled_input_data = interpolate_quaternions(input_data[:, 0], input_data[:, 1:], rate=RATE, time_orig_at_zero=False) sampled_input_data_xyz = recover_xyz_from_quaternions_trace(sampled_input_data) # 2. For the saliency, get the time-stamps of the input trace and find the closest first_decoder_saliency_timestamp = input_data[-1, 0] + 0.2 first_decoder_sal_id = np.argmin(np.power(time_stamps_in_saliency - first_decoder_saliency_timestamp, 2.0)) if model_name not in ['pos_only', 'no_motion']: encoder_sal_inputs_for_sample = np.array([np.expand_dims(all_saliencies[video][first_decoder_sal_id - history_window:first_decoder_sal_id], axis=-1)]) # ToDo: Be careful here, we are using TRAINED_PREDICTION_HORIZON to load future saliencies decoder_sal_inputs_for_sample = np.zeros((1, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH, 1)) taken_saliencies = all_saliencies[video][first_decoder_sal_id:min(first_decoder_sal_id + TRAINED_PREDICTION_HORIZON, len(all_saliencies[video]))] # decoder_sal_inputs_for_sample = np.array([np.expand_dims(taken_saliencies, axis=-1)]) decoder_sal_inputs_for_sample[0, :len(taken_saliencies), :, :, 0] = taken_saliencies encoder_pos_inputs_for_sample = [sampled_input_data_xyz[-history_window - 1:-1, 1:]] decoder_pos_inputs_for_sample = [sampled_input_data_xyz[-1:, 1:]] # 3. predict if model_name == 'TRACK': model_prediction = model.predict( [np.array(encoder_pos_inputs_for_sample), np.array(encoder_sal_inputs_for_sample), np.array(decoder_pos_inputs_for_sample), np.array(decoder_sal_inputs_for_sample)])[0] elif model_name == 'CVPR18': model_prediction = model.predict( [np.array(encoder_pos_inputs_for_sample), np.array(decoder_pos_inputs_for_sample), np.array(decoder_sal_inputs_for_sample)])[0] elif model_name == 'pos_only': model_pred = model.predict( [transform_batches_cartesian_to_normalized_eulerian(encoder_pos_inputs_for_sample), transform_batches_cartesian_to_normalized_eulerian(decoder_pos_inputs_for_sample)])[0] model_prediction = transform_normalized_eulerian_to_cartesian(model_pred) # 4. upsample the predicted trace from 0.2 sec to the video rate sample_orig = np.array([1, 0, 0]) quat_rot_1 = rotationBetweenVectors(sample_orig, sampled_input_data_xyz[-1, 1:]) quat_rot_1 = np.array([quat_rot_1[0], quat_rot_1[1], quat_rot_1[2], quat_rot_1[3]]) quat_rot_2 = rotationBetweenVectors(sample_orig, model_prediction[0]) quat_rot_2 = np.array([quat_rot_2[0], quat_rot_2[1], quat_rot_2[2], quat_rot_2[3]]) interpolated = interpolate_quaternions([0.0, RATE], [quat_rot_1, quat_rot_2], rate=1.0/video_rate) pred_samples = recover_original_angles_from_quaternions_trace(interpolated) pred_sample_t_n = pred_samples[1] mo_score = mo_calculator.calc_mo_deg([pred_sample_t_n[0], pred_sample_t_n[1]], [sample_t_n[0], sample_t_n[1]], is_centered=True) error_per_video[video].append(mo_score) avg_error_per_video = {} for video in VIDEOS_TEST: avg_error_per_video[video] = np.mean(error_per_video[video]) return avg_error_per_video
if model_name == 'MM18': MM18_model.create_gt_sal(TRUE_SALIENCY_FOLDER, all_traces) # Load the saliency only if it's not the position_only baseline if model_name not in ['pos_only', 'no_motion', 'true_saliency', 'content_based_saliency']: all_saliencies = {} if args.use_true_saliency: for video in videos: if model_name == 'MM18': all_saliencies[video] = MM18_model.get_true_saliency(TRUE_SALIENCY_FOLDER, video) else: all_saliencies[video] = load_true_saliency(TRUE_SALIENCY_FOLDER, video) else: for video in videos: all_saliencies[video] = load_saliency(SALIENCY_FOLDER, video) if model_name == 'MM18': all_headmaps = {} for video in videos: all_headmaps[video] = {} for user in users_per_video[video]: all_headmaps[video][user] = MM18_model.get_headmaps(all_traces[video][user]) if model_name == 'true_saliency': most_salient_points_per_video = get_most_salient_points_per_video(videos, TRUE_SALIENCY_FOLDER, k=int(args.num_of_peaks)) if model_name == 'content_based_saliency': most_salient_points_per_video = get_most_salient_content_based_points_per_video(videos, SALIENCY_FOLDER, k=int(args.num_of_peaks)) def transform_batches_cartesian_to_normalized_eulerian(positions_in_batch): positions_in_batch = np.array(positions_in_batch)
def compute_pretrained_model_error_xyz(dataset, videos_list, model_name, history_window, prediction_horizon, model_weights_path): if model_name == 'TRACK': model = create_TRACK_model(history_window, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) elif model_name == 'CVPR18': model = create_CVPR18_model(history_window, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH) elif model_name == 'CVPR18_orig': model = create_CVPR18_orig_Model(history_window, NUM_TILES_HEIGHT_TRUE_SAL, NUM_TILES_WIDTH_TRUE_SAL) elif model_name == 'pos_only': model = create_pos_only_model(history_window, TRAINED_PREDICTION_HORIZON) ### if os.path.isfile(model_weights_path): model.load_weights(model_weights_path) else: command = 'python training_procedure.py -train -gpu_id 0 -dataset_name Xu_CVPR_18 -model_name %s -m_window 5 -h_window 5 -exp_folder sampled_dataset_replica -provided_videos' % model_name if model_name not in ['no_motion', 'pos_only', 'TRACK']: command += ' -use_true_saliency' raise Exception('Sorry, the folder ./Xu_CVPR_18/'+model_name+'/ doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"'+command+'\" or \n* Download the files from:\n\thttps://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/EjhbHp5qgDRKrtkqODKayq0BoCqUY76cmm8bDwdbMOTqeQ?e=fGRFjo') ### saliency_folder = os.path.join(ROOT_FOLDER, 'extract_saliency/saliency') true_saliency_folder = os.path.join(ROOT_FOLDER, 'true_saliency') if model_name not in ['pos_only']: all_saliencies = {} for video in videos_list: # for model CVPR18_orig we use the true saliency: if model_name == 'CVPR18_orig': if os.path.isdir(true_saliency_folder): all_saliencies[video] = load_true_saliency(true_saliency_folder, video) else: raise Exception('Sorry, the folder ./Xu_CVPR_18/true_saliency doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"python ./Xu_CVPR_18/Read_Dataset.py -creat_true_sal\" or \n* Download the folder from:\n\thttps://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/EsOFppF2mSRBtCtlmUM0TV4BGFRb1plZWgtUxSEo_E-I7w?e=pKXxCf') else: if os.path.isdir(saliency_folder): all_saliencies[video] = load_saliency(saliency_folder, video) else: raise Exception('Sorry, the folder ./Xu_CVPR_18/extract_saliency doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"./Xu_CVPR_18/dataset/creation_of_scaled_images.sh\n\tpython ./Extract_Saliency/panosalnet.py -dataset_name CVPR_18\" or \n* Download the folder from:\n\thttps://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/EvRCuy0v5BpDmADTPUuA8JgBoIgaWcFbR0S7wIXlevIIGQ?e=goOz7o') intersection_angle_error = [] for enum_user, user in enumerate(dataset.keys()): for enum_video, video in enumerate(dataset[user].keys()): if video in videos_list: print('computing error for trace', 'user', enum_user, '/', len(dataset.keys()), 'video', enum_video, '/', len(dataset[user].keys())) xyz_per_video = dataset[user][video] for t in range(history_window, len(xyz_per_video)-prediction_horizon): if model_name not in ['pos_only', 'no_motion']: encoder_sal_inputs_for_sample = np.array([np.expand_dims(all_saliencies[video][t - history_window + 1:t + 1], axis=-1)]) # ToDo: Be careful here, we are using TRAINED_PREDICTION_HORIZON to load future saliencies if model_name == 'CVPR18_orig': decoder_sal_inputs_for_sample = np.zeros((1, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT_TRUE_SAL, NUM_TILES_WIDTH_TRUE_SAL, 1)) else: decoder_sal_inputs_for_sample = np.zeros((1, TRAINED_PREDICTION_HORIZON, NUM_TILES_HEIGHT, NUM_TILES_WIDTH, 1)) taken_saliencies = all_saliencies[video][t + 1:min(t + TRAINED_PREDICTION_HORIZON + 1, len(all_saliencies[video]))] # decoder_sal_inputs_for_sample = np.array([np.expand_dims(taken_saliencies, axis=-1)]) decoder_sal_inputs_for_sample[0, :len(taken_saliencies), :, :, 0] = taken_saliencies encoder_pos_inputs_for_sample = [xyz_per_video[t-history_window:t, 1:]] decoder_pos_inputs_for_sample = [xyz_per_video[t:t+1, 1:]] if model_name == 'TRACK': model_prediction = model.predict( [np.array(encoder_pos_inputs_for_sample), np.array(encoder_sal_inputs_for_sample), np.array(decoder_pos_inputs_for_sample), np.array(decoder_sal_inputs_for_sample)])[0] elif model_name == 'CVPR18': model_prediction = model.predict( [np.array(encoder_pos_inputs_for_sample), np.array(decoder_pos_inputs_for_sample), np.array(decoder_sal_inputs_for_sample)])[0] elif model_name == 'CVPR18_orig': initial_pos_inputs = transform_batches_cartesian_to_normalized_eulerian(encoder_pos_inputs_for_sample) model_pred = auto_regressive_prediction(model, initial_pos_inputs, decoder_sal_inputs_for_sample, history_window, prediction_horizon) model_prediction = transform_normalized_eulerian_to_cartesian(model_pred) elif model_name == 'pos_only': model_pred = model.predict( [transform_batches_cartesian_to_normalized_eulerian(encoder_pos_inputs_for_sample), transform_batches_cartesian_to_normalized_eulerian(decoder_pos_inputs_for_sample)])[0] model_prediction = transform_normalized_eulerian_to_cartesian(model_pred) for x_i in range(prediction_horizon): pred_t_n = model_prediction[x_i] sample_t_n = xyz_per_video[t+x_i+1, 1:] int_ang_err = compute_orthodromic_distance(pred_t_n, sample_t_n) intersection_angle_error.append(radian_to_degrees(int_ang_err)) return intersection_angle_error
def TRACK_metrics(original_dataset_cartesian): M_WINDOW_TRAINED_MODEL = 5 H_WINDOW_TRAINED_MODEL = 25 traces_train, traces_test = get_traces_for_train_and_test() model = create_TRACK_model(M_WINDOW_TRAINED_MODEL, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL) ###model.load_weights(os.path.join(ROOT_FOLDER, 'TRACK', '', 'weights_047.hdf5')) weights_file = os.path.join( ROOT_FOLDER, 'TRACK', 'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_25_end_25', 'weights.hdf5') if os.path.isfile(weights_file): model.load_weights(weights_file) else: raise Exception( 'Sorry, the file ./Fan_NOSSDAV_17/TRACK/Models_EncDec_3DCoords_ContSal_init_5_in_5_out_25_end_25/weights.hdf5 doesn\'t exist.\nYou can:\n* Create it using the command:\n\t\"python training_procedure.py -train -gpu_id 0 -dataset_name Fan_NOSSDAV_17 -model_name TRACK -m_window 5 -h_window 5\" or \n* Download the file from:\n\thttps://unice-my.sharepoint.com/:u:/g/personal/miguel_romero-rondon_unice_fr/EUEAuXdjNO1GlGPSDFAi5VcBVdysMmWBFjKLYa0uE2tRMw?e=HFWSQe' ) accuracy_results = [] f1_score_results = [] ranking_results = [] for trace_num, trace in enumerate(traces_test): user = trace['user'] video = trace['video'] repl_tiles_map = read_replica_tile_info(video, user) if os.path.isdir(SALIENCY_FOLDER): saliency_in_video = load_saliency(SALIENCY_FOLDER, video) else: raise Exception( 'Sorry, the folder ./Fan_NOSSDAV_17/extract_saliency doesn\'t exist or is incomplete.\nYou can:\n* Create it using the command:\n\t\"python ./Fan_NOSSDAV_17/Read_Dataset.py -creat_cb_sal\" or \n* Download the folder from:\n\thttps://unice-my.sharepoint.com/:f:/g/personal/miguel_romero-rondon_unice_fr/Eh4Ojy9wDs1Jt4xXfKdrK6cBHQcWVSC2eglWyZOilkptvA?e=KeNZ2F' ) for t in range(M_WINDOW, len(original_dataset_cartesian[user][video]) - H_WINDOW): print('computing no_motion metrics for trace', trace_num, '/', len(traces_test), 'time-stamp:', t) past_positions = original_dataset_cartesian[user][video][ t - M_WINDOW:t + 1] # ToDo: The value "6" is hardcoder, it comes from "int(MODEL_SAMPLING_RATE / ORIGINAL_SAMPLING_RATE)" curr_id_in_model_steps = int(t / 6) sal_encoder = np.zeros( (1, M_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL, 1)) sal_decoder = np.zeros( (1, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL, 1)) picked_sal_encoder = saliency_in_video[curr_id_in_model_steps - M_WINDOW_TRAINED_MODEL + 1:curr_id_in_model_steps + 1] picked_sal_decoder = saliency_in_video[curr_id_in_model_steps + 1:curr_id_in_model_steps + H_WINDOW_TRAINED_MODEL + 1] sal_encoder[0, :len(picked_sal_encoder), :, :, 0] = picked_sal_encoder sal_decoder[0, :len(picked_sal_decoder), :, :, 0] = picked_sal_decoder pred_tile_map = get_TRACK_prediction(model, past_positions, M_WINDOW_TRAINED_MODEL, sal_encoder, sal_decoder) # future_positions = original_dataset_cartesian[user][video][t+1:t+H_WINDOW+1] future_tile_maps = repl_tiles_map[t + 1:t + H_WINDOW + 1] for x_i, tile_map in enumerate(future_tile_maps): accuracy_results.append( accuracy_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map[x_i]))) f1_score_results.append( f1_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map[x_i]))) ranking_results.append( label_ranking_loss(tile_map, pred_tile_map[x_i])) return np.mean(accuracy_results) * 100, np.mean(f1_score_results), np.mean( ranking_results)