def get_cropped_images(movie_path: str, bounding_boxes_list: list, max_frame: int=100000,): bounding_boxes_dict = {} for bounding_box in bounding_boxes_list: frame_idx = bounding_box[0] if frame_idx not in bounding_boxes_dict.keys(): bounding_boxes_dict[frame_idx] = [bounding_box[3:7]] else: bounding_boxes_dict[frame_idx].append(bounding_box[3:7]) # Create video source instance video_src = Video_Reader(movie_path) cropped_image_list = [] tbar = tqdm.tqdm(range(max_frame)) for frame_idx in tbar: ret, image = video_src.get_frame() if not ret: break if frame_idx in bounding_boxes_dict.keys(): bounding_boxes = bounding_boxes_dict[frame_idx] for bbx in bounding_boxes: cropped_image = image[bbx[1]: bbx[3], bbx[0]: bbx[2], :] cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB) cropped_image = Image.fromarray(cropped_image) cropped_image = utils.make_square(cropped_image) cropped_image = F.resize(cropped_image, size=160, interpolation=1) cropped_image_list.append(cropped_image) return cropped_image_list
def get_bounding_boxes(movie_path: str, max_frame: int=100000, tracker_max_age: int=10): # Create video source instance print('Initializing video capture at {}'.format(movie_path)) video_src = Video_Reader(movie_path) my_fastdt = FAST_DT(tracker_max_age=tracker_max_age) print('Extracting face patches.') bounding_boxes_list = [] bbx_idx = 0 tbar = tqdm.tqdm(range(max_frame)) for frame_idx in tbar: ret, image = video_src.get_frame() if not ret: break bounding_boxes = my_fastdt.predict(image) for bbx in bounding_boxes: track_id = bbx[4] bounding_boxes_list.append([frame_idx, track_id, bbx_idx, bbx[1], bbx[3], bbx[0], bbx[2]]) bbx_idx += 1 return bounding_boxes_list
def annotate_video(video_src: Video_Reader, frame_track_dict: dict, labels, video_out_path): fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(video_out_path, fourcc, 25, (img_width, img_height)) video_src.reset() bbx_idx = 0 tbar = tqdm.tqdm(range(num_frame)) for frame_idx in tbar: ret, image = video_src.get_frame() if not ret: break if frame_idx in frame_track_dict.keys(): bounding_boxes = frame_track_dict[frame_idx] for bbx in bounding_boxes: label = labels[bbx_idx] annotation_str = ['Subject: {}'.format(label)] draw_bounding_box_on_image_array( image, bbx[1], bbx[0], bbx[3], bbx[2], color=color[label], thickness=4, display_str_list=annotation_str, use_normalized_coordinates=False) bbx_idx += 1 cv2.putText(image, "Frame {}".format(frame_idx), (10, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_4) out.write(image) print('') print('Saving video at ' + video_out_path) out.release()
def write_video(movie_path: str, output_path: str, pred_labels: np.array, frame_dict: dict, name: str = 'video_out', max_frame: int = 100000): # Set up video reader video_src = Video_Reader(movie_path) _, image = video_src.get_frame() img_height, img_width, img_channel = image.shape video_src.reset() # Set up video writer if not os.path.exists(output_path): os.mkdir(output_path) video_out_filepath = output_path + name + '.avi' fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(video_out_filepath, fourcc, 25, (img_width, img_height)) print('Writing video at {}.'.format(video_out_filepath)) bbx_idx = 0 tbar = tqdm.tqdm(range(max_frame)) for frame_idx in tbar: ret, image = video_src.get_frame() if not ret: break if frame_idx in frame_dict.keys(): bounding_boxes = frame_dict[frame_idx] for bbx in bounding_boxes: label = pred_labels[bbx_idx] annotation_str = ['Subject: {}'.format(label)] draw_bounding_box_on_image_array( image, bbx[1], bbx[0], bbx[3], bbx[2], color=color[label], thickness=4, display_str_list=annotation_str, use_normalized_coordinates=False) bbx_idx += 1 cv2.putText(image, "Frame {}".format(frame_idx), (10, 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_4) out.write(image) out.release()
def extract_roi_from_matlab_annotations(movie_path: str, annotation_path: str, output_path: str, max_frame: int = 100000): if not os.path.exists(output_path): os.mkdir(output_path) # Create video source instance print('Initializing video capture at {}'.format(movie_path)) video_src = Video_Reader(movie_path) _, image = video_src.get_frame() video_src.reset() img_height, img_width, img_channel = image.shape print('Reading annotation at {}'.format(annotation_path)) Annotation_list = bbt.Read_Annotation(annotation_path, (img_width, img_height)) cooccurring_tracks = [] bounding_boxes_list = [] bbx_to_gt_list = [] track_to_gt_list = [] print('Extracting face patches.') frame_idx = 0 bbx_idx = 0 num_frame = min(len(Annotation_list), max_frame) tbar = tqdm.tqdm(range(num_frame)) for j in tbar: ret, image = video_src.get_frame() if not ret: break bounding_boxes = Annotation_list[frame_idx] track_list = [] for bbx in bounding_boxes: cropped_image = image[bbx[1]:bbx[3], bbx[0]:bbx[2], :] cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB) cropped_image = Image.fromarray(cropped_image) cropped_image = utils.make_square(cropped_image) cropped_image = cropped_image.resize((160, 160), resample=Image.LANCZOS) track_id = bbx[6] gt_label = bbx[4] bounding_boxes_list.append( [frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2], bbx[3]]) bbx_to_gt_list.append([bbx_idx, gt_label]) track_to_gt_list.append([track_id, gt_label]) # Save image dir_name = '{:04d}'.format(track_id) image_name = '{:06d}.png'.format(bbx_idx) save_path = os.path.join(output_path, dir_name) if not os.path.exists(save_path): os.mkdir(save_path) save_file_path = os.path.join(save_path, image_name) cropped_image.save(save_file_path) track_list.append(track_id) bbx_idx += 1 # Note co-occurring tracks if len(track_list) > 1: track_list = sorted(track_list) if track_list not in cooccurring_tracks: cooccurring_tracks.append(track_list) frame_idx += 1 # Save co-occurring tracksset utils.write_list_to_file( os.path.join(output_path, "cooccurring_tracks.txt"), cooccurring_tracks) # Save bbx utils.write_list_to_file(os.path.join(output_path, "bbx.txt"), bounding_boxes_list) # Save ground truth utils.write_list_to_file(os.path.join(output_path, "bbx_gt.txt"), bbx_to_gt_list) utils.write_list_to_file(os.path.join(output_path, "track_gt.txt"), track_to_gt_list) print('{} co-occurring tracks.'.format(len(cooccurring_tracks)))
def extract_roi(movie_path: str, output_path: str, max_frame: int = 100000, tracker_max_age: int = 10): # Create video source instance print('Initializing video capture at {}'.format(movie_path)) video_src = Video_Reader(movie_path) _, image = video_src.get_frame() video_src.reset() my_fastdt = FAST_DT("cpu", tracker_max_age=tracker_max_age) print('Extracting face patches.') image_dict = {} bbx_dict = {} cooccurring_tracks = [] bbx_idx = 0 tbar = tqdm.tqdm(range(max_frame)) for frame_idx in tbar: ret, image = video_src.get_frame() if not ret: break bounding_boxes = my_fastdt.predict(image) for bbx in bounding_boxes: cropped_image = image[bbx[1]:bbx[3], bbx[0]:bbx[2], :] cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB) cropped_image = Image.fromarray(cropped_image) cropped_image = utils.make_square(cropped_image) cropped_image = F.resize(cropped_image, size=160, interpolation=1) track_id = bbx[4] # bounding_boxes_list.append([frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2], bbx[3]]) if track_id not in image_dict.keys(): image_dict[track_id] = [(cropped_image, bbx_idx, frame_idx)] bbx_dict[track_id] = [[ frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2], bbx[3] ]] else: image_dict[track_id].append( (cropped_image, bbx_idx, frame_idx)) bbx_dict[track_id].append([ frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2], bbx[3] ]) bbx_idx += 1 # Remove the last samples of each track as they are residual samples from the tracker max age print('Removing residual samples.') track_id_list = list(image_dict.keys()) for track_id in track_id_list: if len(image_dict[track_id]) + 1 < tracker_max_age: image_dict.pop(track_id) bbx_dict.pop(track_id) else: image_dict[track_id] = image_dict[track_id][1:-tracker_max_age] bbx_dict[track_id] = bbx_dict[track_id][1:-tracker_max_age] # Create the bounding_box_list bounding_boxes_list = [] for track_id in bbx_dict.keys(): for bbx in bbx_dict[track_id]: bounding_boxes_list.append(bbx) # Convert the track classed dictionary to a frame classed dictionary print('Creating dataset.') if not os.path.exists(output_path): os.mkdir(output_path) frame_to_track_dict = {} tbar2 = tqdm.tqdm(image_dict.keys()) for track_id in tbar2: for cropped_image, bbx_idx, frame_idx in image_dict[track_id]: if frame_idx not in frame_to_track_dict.keys(): frame_to_track_dict[frame_idx] = [track_id] else: frame_to_track_dict[frame_idx].append(track_id) # Save image dir_name = '{:04d}'.format(track_id) image_name = '{:06d}.png'.format(bbx_idx) save_path = os.path.join(output_path, dir_name) if not os.path.exists(save_path): os.mkdir(save_path) save_file_path = os.path.join(save_path, image_name) cropped_image.save(save_file_path) # Find co-occurring tracks print('Forming co-occurring tracks file.') for frame_idx in frame_to_track_dict.keys(): track_list = [] for track_id in frame_to_track_dict[frame_idx]: track_list.append(track_id) # Note co-occurring tracks if len(track_list) > 1: track_list = sorted(track_list) if track_list not in cooccurring_tracks: cooccurring_tracks.append(track_list) # Save co-occurring tracksset utils.write_list_to_file( os.path.join(output_path, "cooccurring_tracks.txt"), cooccurring_tracks) # Save bbx utils.write_list_to_file(os.path.join(output_path, "bbx.txt"), bounding_boxes_list) print('{} co-occurring tracks.'.format(len(cooccurring_tracks)))
def __init__(self, annotation_path, movie_path, max_frame, transform=None): self.annotation_path = annotation_path self.movie_path = movie_path self.transform = transform # Create video source instance print('Initializing video capture at {}'.format(movie_path)) video_src = Video_Reader(movie_path) _, image = video_src.get_frame() img_height, img_width, img_channel = image.shape print('Reading annotation at {}'.format(annotation_path)) Annotation_list = Read_Annotation(annotation_path, (img_width, img_height)) cropped_image_list = [] sample_tarkid_list = [] cooccuring_tracks_list = [] tracksamplesidxs_dict = {} gt_labels_list = [] classes_to_idx = {} idx_to_classes = {} gt_idx_list = [] num_gt_classes = 0 num_frame = min(len(Annotation_list), max_frame) print('Extracting face patches.') video_src.reset() frame_idx = 0 image_idx = 0 tbar = tqdm.tqdm(range(num_frame)) for j in tbar: ret, image = video_src.get_frame() if not ret: break if frame_idx < 0: frame_annotations = [] else: frame_annotations = Annotation_list[frame_idx] track_list = [] for annotation in frame_annotations: cropped_image = image[annotation[1]:annotation[3], annotation[0]:annotation[2], :] cropped_image = cropped_image[:, :, ::-1] # BGR to RGB # cropped_image = np.transpose(cropped_image, (2, 0, 1)) cropped_image = Image.fromarray(cropped_image) cropped_image_list.append(cropped_image) sample_tarkid_list.append(annotation[6]) if annotation[6] not in tracksamplesidxs_dict.keys(): tracksamplesidxs_dict[annotation[6]] = [image_idx] else: tracksamplesidxs_dict[annotation[6]].append(image_idx) if annotation[4] not in classes_to_idx.keys(): classes_to_idx[annotation[4]] = num_gt_classes idx_to_classes[num_gt_classes] = annotation[4] gt_idx_list.append(num_gt_classes) num_gt_classes += 1 gt_labels_list.append(classes_to_idx[annotation[4]]) track_list.append(annotation[6]) image_idx += 1 # Note co-occuring tracks if len(frame_annotations) > 1: track_list.sort() if track_list not in cooccuring_tracks_list: cooccuring_tracks_list.append(track_list) frame_idx += 1 print('') self.cropped_image_list = cropped_image_list self.cooccuring_tracks_list = cooccuring_tracks_list self.tracksamplesidxs_dict = tracksamplesidxs_dict self.sample_tarkid_list = sample_tarkid_list
sys.exit(0) signal.signal(signal.SIGINT, signal_handler) if __name__ == '__main__': # ffmpeg - i bbtS01E01.mkv - vf scale = 1024:576 - r 25 - codec: a copy outputbbt1.mkv annotation_path = '/export/livia/data/lemoineh/CVPR2013_PersonID_data/bbt_s01e01_facetracks.mat' movie_path = '/export/livia/data/lemoineh/BBT/bbts01e01.mkv' dataset_path = '/export/livia/data/lemoineh/BBT/ep01/' # Create video source instance print('Initializing video capture.') video_src = Video_Reader(movie_path) bbx = None video_out_dir = 'output/' if not os.path.exists(video_out_dir): os.mkdir(video_out_dir) _, image = video_src.get_frame() img_height, img_width, img_channel = image.shape fourcc = cv2.VideoWriter_fourcc(*'XVID') filename = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') video_out_filepath = video_out_dir + filename + '.avi' out = cv2.VideoWriter(video_out_filepath, fourcc, 25,
use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Load model print('Loading model from checkpoint {}'.format(config.model.checkpoint_path)) checkpoint = torch.load(config.model.checkpoint_path) embedding_size = checkpoint['embedding_size'] model = models.load_model(config.model.model_arch, device, embedding_size=embedding_size) model.load_state_dict(checkpoint['model_state_dict']) # Create video source instance print('Initializing video capture at {}'.format(config.dataset.bbt.movie_path)) video_src = Video_Reader(config.dataset.bbt.movie_path) if not os.path.exists(config.output.video_dir): os.mkdir(config.output.video_dir) _, image = video_src.get_frame() img_height, img_width, img_channel = image.shape # time metrics cycle_time = 1.0 cropped_image_list = [] track_dict = {} frame_dict = {} num_frame = config.dataset.bbt.num_frame