action_category = l.rstrip().split(" ")[-1] class_id = class_list.index(action_category) if movie_name in movie_instances.keys(): movie_instances[movie_name].append( (gt_start, gt_end, class_id)) else: movie_instances[movie_name] = [(gt_start, gt_end, class_id)] return movie_instances save_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/PropEval/thumos14_val_groundtruth.csv' data_frame = [] pathVars = path_vars.PathVars() movie_instances = ReadAnnotations(class_list=pathVars.classnames) for i, _key in enumerate(movie_instances): # fps = movie_fps[_key] frm_num = pathVars.video_frames[_key] for line in movie_instances[_key]: start = int(line[0]) end = int(line[1]) label_idx = int(line[2]) data_frame.append([end, start, label_idx, frm_num, _key]) results = pd.DataFrame(
def __init__(self, seq_length=360, overlap=0.9, feature_file_ext='npy'): self.PathVars = path_vars.PathVars() self.feature_directory = '/home/zwei/datasets/THUMOS14/features/BNInception' self.feature_file_ext = feature_file_ext self.annotation_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/PropEval/thumos14_val_groundtruth.csv' print("Reading training data list from {:s}\t clip=len:{:d}".format( self.annotation_file, seq_length)) self.movie_instances = {} ground_truth = pd.read_csv(self.annotation_file, sep=' ') n_ground_truth = len(ground_truth) for i_pos in range(n_ground_truth): s_ground_truth = ground_truth.loc[[i_pos]] movie_name = s_ground_truth['video-name'].values[0] n_frames = self.PathVars.video_frames[movie_name] if movie_name == miss_name: # print("DEB") continue else: gt_start = s_ground_truth['f-init'].values[0] gt_end = min(s_ground_truth['f-end'].values[0], n_frames) if movie_name in self.movie_instances.keys(): self.movie_instances[movie_name].append((gt_start, gt_end)) else: self.movie_instances[movie_name] = [(gt_start, gt_end)] n_positive_instances = 0 total_reps = 0 #TODO: during training, we can remove the repeats, in the test, the repeats come out as you have to evaluate on different overlapped clipps for s_name in self.movie_instances.keys(): s_action_list = self.movie_instances[s_name] orig_len = len(s_action_list) s_action_list = list(set(s_action_list)) s_action_list.sort() cur_len = len(s_action_list) # print("{:s}\t reps{:d}".format(s_name, orig_len-cur_len)) total_reps += orig_len - cur_len n_positive_instances += len(s_action_list) self.movie_instances[s_name] = s_action_list print("{:d} reps found".format(total_reps)) self.instances = [] self.maximum_outputs = 0 self.seq_len = seq_length for s_movie_name in self.movie_instances.keys(): s_movie_instance = self.movie_instances[s_movie_name] # s_movie_instance = list(set(s_movie_instance)) n_frames = self.PathVars.video_frames[s_movie_name] start_idx = 0 # end_idx = start_idx+(self.seq_len)*self.unit_size # TODO: detail here, not covered the the ones that are expanding out get_outbound = False while start_idx < n_frames: end_idx = start_idx + self.seq_len if end_idx >= n_frames: #TODO: should we add 1 offset? start_idx = start_idx - (end_idx - n_frames) end_idx = n_frames get_outbound = True s_instance = {} s_instance['name'] = s_movie_name s_instance['start'] = start_idx s_instance['end'] = end_idx s_instance['actions'] = [] for s_action in s_movie_instance: if s_action[0] >= start_idx and s_action[1] < end_idx: s_instance['actions'].append(s_action) if len(s_instance['actions']) > self.maximum_outputs: self.maximum_outputs = len(s_instance['actions']) self.instances.append(s_instance) if get_outbound: break start_idx = int(start_idx + (1 - overlap) * self.seq_len) print( "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}" .format(len(self.movie_instances), len(self.instances), n_positive_instances, self.maximum_outputs))
def __init__(self, seq_length=50, unit_size=16, overlap=0.5, feature_file_ext='mat'): self.PathVars = path_vars.PathVars() self.unit_size = unit_size self.feature_directory = '/home/zwei/datasets/THUMOS14/features/c3d' self.feature_file_ext = feature_file_ext self.annotation_file = '/home/zwei/Dev/TURN_TAP_ICCV17/turn_codes/val_training_samples.txt' print("Reading training data list from {:s}\t clip=len:{:d}".format( self.annotation_file, seq_length)) self.movie_instances = {} with open(self.annotation_file) as f: for l in f: movie_name = l.rstrip().split(" ")[0] # clip_start = float(l.rstrip().split(" ")[1]) # clip_end = float(l.rstrip().split(" ")[2]) gt_start = float(l.rstrip().split(" ")[3]) gt_end = float(l.rstrip().split(" ")[4]) round_gt_start = np.round( gt_start / self.unit_size) * self.unit_size + 1 round_gt_end = np.round( gt_end / self.unit_size) * self.unit_size + 1 action_category = l.rstrip().split(" ")[-1] class_id = self.PathVars.classnames.index(action_category) #TODO: current we want to remove the overlap with different classes if movie_name in self.movie_instances.keys(): # self.movie_instances[movie_name].append((gt_start, gt_end, round_gt_start, round_gt_end, class_id)) self.movie_instances[movie_name].append( (gt_start, gt_end, round_gt_start, round_gt_end)) else: # self.movie_instances[movie_name] = [(gt_start, gt_end, round_gt_start, round_gt_end, class_id)] self.movie_instances[movie_name] = [ (gt_start, gt_end, round_gt_start, round_gt_end) ] #TODO: remove the repeats n_positive_instances = 0 for s_name in self.movie_instances.keys(): s_action_list = self.movie_instances[s_name] s_action_list = list(set(s_action_list)) s_action_list.sort() n_positive_instances += len(s_action_list) self.movie_instances[s_name] = s_action_list self.seq_len = seq_length self.instances = [] self.maximum_outputs = 0 for s_movie_name in self.movie_instances.keys(): s_movie_instance = self.movie_instances[s_movie_name] # s_movie_instance = list(set(s_movie_instance)) n_frames = self.PathVars.video_frames[s_movie_name] start_idx = 0 end_idx = (start_idx + self.seq_len) * self.unit_size while end_idx < n_frames: s_instance = {} s_instance['name'] = s_movie_name s_instance['start'] = start_idx s_instance['end'] = end_idx s_instance['actions'] = [] for s_action in s_movie_instance: if s_action[2] >= start_idx and s_action[3] < end_idx: s_instance['actions'].append(s_action) if len(s_instance['actions']) > self.maximum_outputs: self.maximum_outputs = len(s_instance['actions']) self.instances.append(s_instance) start_idx = int(start_idx + self.seq_len * self.unit_size - self.seq_len * overlap * self.unit_size) end_idx = start_idx + self.seq_len * unit_size print( "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}" .format(len(self.movie_instances), len(self.instances), n_positive_instances, self.maximum_outputs))
def __init__(self, seq_length=360, overlap=0.9, sample_rate=1, dataset_split='val', feature_file_ext='npy'): self.PathVars = path_vars.PathVars() self.feature_directory = '/home/zwei/datasets/THUMOS14/features/BNInception' self.feature_file_ext = feature_file_ext self.annotation_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/action_det_prep/thumos14_tag_{:s}_proposal_list.csv'.format( dataset_split) print( "Reading training data list from {:s}\t clip len:{:d}, sample rate: {:d}" .format(self.annotation_file, seq_length, sample_rate)) self.movie_instances = {} ground_truth = pd.read_csv(self.annotation_file, sep=' ') n_ground_truth = len(ground_truth) for i_pos in range(n_ground_truth): s_ground_truth = ground_truth.loc[[i_pos]] movie_name = s_ground_truth['video-name'].values[0] n_frames = self.PathVars.video_frames[movie_name] if movie_name in miss_name: continue else: gt_start = s_ground_truth['f-init'].values[0] gt_end = min(s_ground_truth['f-end'].values[0], n_frames) if movie_name in self.movie_instances.keys(): self.movie_instances[movie_name].append((gt_start, gt_end)) else: self.movie_instances[movie_name] = [(gt_start, gt_end)] n_positive_instances = 0 total_reps = 0 #Update: during training, we can remove the repeats, in the test, the repeats come out as you have to evaluate on different overlapped clipps for s_name in self.movie_instances.keys(): s_action_list = self.movie_instances[s_name] orig_len = len(s_action_list) s_action_list = list(set(s_action_list)) s_action_list.sort() # sort from left to right cur_len = len(s_action_list) # print("{:s}\t reps{:d}".format(s_name, orig_len-cur_len)) total_reps += orig_len - cur_len n_positive_instances += len(s_action_list) self.movie_instances[s_name] = s_action_list print("{:d} reps found".format(total_reps)) self.instances = [] self.maximum_outputs = 0 self.seq_len = seq_length self.sample_rate = sample_rate for s_movie_name in self.movie_instances.keys(): s_movie_instance = self.movie_instances[s_movie_name] n_frames = self.PathVars.video_frames[s_movie_name] start_idx = 0 isInbound = True while start_idx < n_frames and isInbound: end_idx = start_idx + self.seq_len #UPDATE: cannot set to >, since we want to set isInbound to False this time if end_idx >= n_frames: isInbound = False start_idx = start_idx - (end_idx - n_frames) end_idx = n_frames s_instance = {} s_instance['name'] = s_movie_name s_instance['start'] = start_idx s_instance['end'] = end_idx s_instance['actions'] = [] #TODO: also think about here, perhaps keep the ones that are overlap with the current clip over a threshod? #TODO: in this way, how are we assigning them scores? s_instance_window = [start_idx, end_idx] for s_action in s_movie_instance: #Update: here include the partially overlaps... if compute_intersection(s_action, s_instance_window) == 1: s_action_start = max(s_action[0], s_instance_window[0]) s_action_end = min( s_action[1], s_instance_window[1] - 1) #TODO:check if here should minus 1 #TODO: add overlap rate here! s_instance['actions'].append( [s_action_start, s_action_end]) if len(s_instance['actions']) > self.maximum_outputs: self.maximum_outputs = len(s_instance['actions']) self.instances.append(s_instance) start_idx = int(start_idx + (1 - overlap) * self.seq_len) print( "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}" .format(len(self.movie_instances), len(self.instances), n_positive_instances, self.maximum_outputs))
def __init__(self, seq_length=50, unit_size=16., overlap=0.9, feature_file_ext='mat'): self.PathVars = path_vars.PathVars() self.unit_size = unit_size self.feature_directory = '/home/zwei/datasets/THUMOS14/features/c3d' self.feature_file_ext = feature_file_ext self.annotation_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/PropEval/thumos14_test_groundtruth.csv' print("Reading training data list from {:s}\t clip=len:{:d}".format( self.annotation_file, seq_length)) self.movie_instances = {} ground_truth = pd.read_csv(self.annotation_file, sep=' ') n_ground_truth = len(ground_truth) for i_pos in range(n_ground_truth): s_ground_truth = ground_truth.loc[[i_pos]] movie_name = s_ground_truth['video-name'].values[0] if movie_name == miss_name: # print("DEB") continue else: gt_start = s_ground_truth['f-init'].values[0] gt_end = s_ground_truth['f-end'].values[0] c3d_gt_start = np.floor(gt_start / self.unit_size) c3d_gt_end = np.floor(gt_end / self.unit_size) if c3d_gt_end == c3d_gt_start: c3d_gt_end += 1 if movie_name in self.movie_instances.keys(): # self.movie_instances[movie_name].append((gt_start, gt_end, round_gt_start, round_gt_end, class_id)) self.movie_instances[movie_name].append( (c3d_gt_start, c3d_gt_end)) else: # self.movie_instances[movie_name] = [(gt_start, gt_end, round_gt_start, round_gt_end, class_id)] self.movie_instances[movie_name] = [(c3d_gt_start, c3d_gt_end)] #TODO: remove the repeats (disabled) n_positive_instances = 0 total_reps = 0 for s_name in self.movie_instances.keys(): s_action_list = self.movie_instances[s_name] orig_len = len(s_action_list) s_action_list = list(set(s_action_list)) s_action_list.sort() cur_len = len(s_action_list) # print("{:s}\t reps{:d}".format(s_name, orig_len-cur_len)) total_reps += orig_len - cur_len n_positive_instances += len(s_action_list) self.movie_instances[s_name] = s_action_list print("{:d} reps found".format(total_reps)) self.instances = [] self.maximum_outputs = 0 self.seq_len = seq_length for s_movie_name in self.movie_instances.keys(): s_movie_instance = self.movie_instances[s_movie_name] # s_movie_instance = list(set(s_movie_instance)) n_frames = int(self.PathVars.video_frames[s_movie_name] / self.unit_size) if n_frames <= self.seq_len: continue start_idx = 0 get_outbound = False while start_idx < n_frames: end_idx = start_idx + self.seq_len if end_idx >= n_frames: #TODO: should we add 1 offset? start_idx = start_idx - (end_idx - n_frames) end_idx = n_frames get_outbound = True s_instance = {} s_instance['name'] = s_movie_name s_instance['start'] = start_idx s_instance['end'] = end_idx s_instance['actions'] = [] for s_action in s_movie_instance: if s_action[0] >= start_idx and s_action[1] < end_idx: s_instance['actions'].append(s_action) if len(s_instance['actions']) > self.maximum_outputs: self.maximum_outputs = len(s_instance['actions']) self.instances.append(s_instance) if get_outbound: break start_idx = int(start_idx + (1 - overlap) * self.seq_len) print( "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}" .format(len(self.movie_instances), len(self.instances), n_positive_instances, self.maximum_outputs))