示例#1
0
def get_cropped_images(movie_path: str,
                       bounding_boxes_list: list,
                       max_frame: int=100000,):

    bounding_boxes_dict = {}
    for bounding_box in bounding_boxes_list:
        frame_idx = bounding_box[0]
        if frame_idx not in bounding_boxes_dict.keys():
            bounding_boxes_dict[frame_idx] = [bounding_box[3:7]]
        else:
            bounding_boxes_dict[frame_idx].append(bounding_box[3:7])

    # Create video source instance
    video_src = Video_Reader(movie_path)

    cropped_image_list = []
    tbar = tqdm.tqdm(range(max_frame))
    for frame_idx in tbar:

        ret, image = video_src.get_frame()
        if not ret:
            break

        if frame_idx in bounding_boxes_dict.keys():

            bounding_boxes = bounding_boxes_dict[frame_idx]
            for bbx in bounding_boxes:
                cropped_image = image[bbx[1]: bbx[3], bbx[0]: bbx[2], :]
                cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
                cropped_image = Image.fromarray(cropped_image)
                cropped_image = utils.make_square(cropped_image)
                cropped_image = F.resize(cropped_image, size=160, interpolation=1)
                cropped_image_list.append(cropped_image)

    return cropped_image_list
示例#2
0
def get_bounding_boxes(movie_path: str,
                       max_frame: int=100000,
                       tracker_max_age: int=10):

    # Create video source instance
    print('Initializing video capture at {}'.format(movie_path))
    video_src = Video_Reader(movie_path)

    my_fastdt = FAST_DT(tracker_max_age=tracker_max_age)

    print('Extracting face patches.')

    bounding_boxes_list = []
    bbx_idx = 0
    tbar = tqdm.tqdm(range(max_frame))
    for frame_idx in tbar:

        ret, image = video_src.get_frame()
        if not ret:
            break

        bounding_boxes = my_fastdt.predict(image)
        for bbx in bounding_boxes:
            track_id = bbx[4]
            bounding_boxes_list.append([frame_idx, track_id, bbx_idx, bbx[1], bbx[3], bbx[0], bbx[2]])
            bbx_idx += 1

    return bounding_boxes_list
def annotate_video(video_src: Video_Reader,
                   frame_track_dict: dict,
                   labels,
                   video_out_path):

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(video_out_path, fourcc, 25, (img_width, img_height))

    video_src.reset()
    bbx_idx = 0
    tbar = tqdm.tqdm(range(num_frame))
    for frame_idx in tbar:

        ret, image = video_src.get_frame()
        if not ret:
            break

        if frame_idx in frame_track_dict.keys():
            bounding_boxes = frame_track_dict[frame_idx]

            for bbx in bounding_boxes:
                label = labels[bbx_idx]

                annotation_str = ['Subject: {}'.format(label)]

                draw_bounding_box_on_image_array(
                    image,
                    bbx[1],
                    bbx[0],
                    bbx[3],
                    bbx[2],
                    color=color[label],
                    thickness=4,
                    display_str_list=annotation_str,
                    use_normalized_coordinates=False)

                bbx_idx += 1

        cv2.putText(image, "Frame {}".format(frame_idx), (10, 20),
                    cv2.FONT_HERSHEY_TRIPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_4)
        out.write(image)
    print('')

    print('Saving video at ' + video_out_path)
    out.release()
示例#4
0
def write_video(movie_path: str,
                output_path: str,
                pred_labels: np.array,
                frame_dict: dict,
                name: str = 'video_out',
                max_frame: int = 100000):

    # Set up video reader
    video_src = Video_Reader(movie_path)
    _, image = video_src.get_frame()
    img_height, img_width, img_channel = image.shape
    video_src.reset()

    # Set up video writer
    if not os.path.exists(output_path):
        os.mkdir(output_path)
    video_out_filepath = output_path + name + '.avi'
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(video_out_filepath, fourcc, 25, (img_width, img_height))
    print('Writing video at {}.'.format(video_out_filepath))

    bbx_idx = 0
    tbar = tqdm.tqdm(range(max_frame))
    for frame_idx in tbar:

        ret, image = video_src.get_frame()
        if not ret:
            break

        if frame_idx in frame_dict.keys():
            bounding_boxes = frame_dict[frame_idx]

            for bbx in bounding_boxes:
                label = pred_labels[bbx_idx]
                annotation_str = ['Subject: {}'.format(label)]

                draw_bounding_box_on_image_array(
                    image,
                    bbx[1],
                    bbx[0],
                    bbx[3],
                    bbx[2],
                    color=color[label],
                    thickness=4,
                    display_str_list=annotation_str,
                    use_normalized_coordinates=False)

                bbx_idx += 1

        cv2.putText(image, "Frame {}".format(frame_idx), (10, 20),
                    cv2.FONT_HERSHEY_TRIPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_4)
        out.write(image)
    out.release()
示例#5
0
def extract_roi_from_matlab_annotations(movie_path: str,
                                        annotation_path: str,
                                        output_path: str,
                                        max_frame: int = 100000):

    if not os.path.exists(output_path):
        os.mkdir(output_path)

    # Create video source instance
    print('Initializing video capture at {}'.format(movie_path))
    video_src = Video_Reader(movie_path)
    _, image = video_src.get_frame()
    video_src.reset()

    img_height, img_width, img_channel = image.shape

    print('Reading annotation at {}'.format(annotation_path))
    Annotation_list = bbt.Read_Annotation(annotation_path,
                                          (img_width, img_height))

    cooccurring_tracks = []
    bounding_boxes_list = []
    bbx_to_gt_list = []
    track_to_gt_list = []

    print('Extracting face patches.')

    frame_idx = 0
    bbx_idx = 0
    num_frame = min(len(Annotation_list), max_frame)
    tbar = tqdm.tqdm(range(num_frame))
    for j in tbar:

        ret, image = video_src.get_frame()
        if not ret:
            break

        bounding_boxes = Annotation_list[frame_idx]

        track_list = []
        for bbx in bounding_boxes:

            cropped_image = image[bbx[1]:bbx[3], bbx[0]:bbx[2], :]
            cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
            cropped_image = Image.fromarray(cropped_image)
            cropped_image = utils.make_square(cropped_image)
            cropped_image = cropped_image.resize((160, 160),
                                                 resample=Image.LANCZOS)

            track_id = bbx[6]
            gt_label = bbx[4]
            bounding_boxes_list.append(
                [frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2], bbx[3]])
            bbx_to_gt_list.append([bbx_idx, gt_label])
            track_to_gt_list.append([track_id, gt_label])

            # Save image
            dir_name = '{:04d}'.format(track_id)
            image_name = '{:06d}.png'.format(bbx_idx)
            save_path = os.path.join(output_path, dir_name)
            if not os.path.exists(save_path):
                os.mkdir(save_path)
            save_file_path = os.path.join(save_path, image_name)
            cropped_image.save(save_file_path)

            track_list.append(track_id)
            bbx_idx += 1

        # Note co-occurring tracks
        if len(track_list) > 1:
            track_list = sorted(track_list)
            if track_list not in cooccurring_tracks:
                cooccurring_tracks.append(track_list)

        frame_idx += 1

    # Save co-occurring tracksset
    utils.write_list_to_file(
        os.path.join(output_path, "cooccurring_tracks.txt"),
        cooccurring_tracks)
    # Save bbx
    utils.write_list_to_file(os.path.join(output_path, "bbx.txt"),
                             bounding_boxes_list)

    # Save ground truth
    utils.write_list_to_file(os.path.join(output_path, "bbx_gt.txt"),
                             bbx_to_gt_list)
    utils.write_list_to_file(os.path.join(output_path, "track_gt.txt"),
                             track_to_gt_list)

    print('{} co-occurring tracks.'.format(len(cooccurring_tracks)))
示例#6
0
def extract_roi(movie_path: str,
                output_path: str,
                max_frame: int = 100000,
                tracker_max_age: int = 10):

    # Create video source instance
    print('Initializing video capture at {}'.format(movie_path))
    video_src = Video_Reader(movie_path)
    _, image = video_src.get_frame()
    video_src.reset()

    my_fastdt = FAST_DT("cpu", tracker_max_age=tracker_max_age)

    print('Extracting face patches.')

    image_dict = {}
    bbx_dict = {}
    cooccurring_tracks = []
    bbx_idx = 0
    tbar = tqdm.tqdm(range(max_frame))
    for frame_idx in tbar:

        ret, image = video_src.get_frame()
        if not ret:
            break

        bounding_boxes = my_fastdt.predict(image)

        for bbx in bounding_boxes:

            cropped_image = image[bbx[1]:bbx[3], bbx[0]:bbx[2], :]
            cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
            cropped_image = Image.fromarray(cropped_image)
            cropped_image = utils.make_square(cropped_image)
            cropped_image = F.resize(cropped_image, size=160, interpolation=1)

            track_id = bbx[4]
            # bounding_boxes_list.append([frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2], bbx[3]])

            if track_id not in image_dict.keys():
                image_dict[track_id] = [(cropped_image, bbx_idx, frame_idx)]
                bbx_dict[track_id] = [[
                    frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2],
                    bbx[3]
                ]]
            else:
                image_dict[track_id].append(
                    (cropped_image, bbx_idx, frame_idx))
                bbx_dict[track_id].append([
                    frame_idx, track_id, bbx_idx, bbx[0], bbx[1], bbx[2],
                    bbx[3]
                ])

            bbx_idx += 1

    # Remove the last samples of each track as they are residual samples from the tracker max age
    print('Removing residual samples.')
    track_id_list = list(image_dict.keys())
    for track_id in track_id_list:
        if len(image_dict[track_id]) + 1 < tracker_max_age:
            image_dict.pop(track_id)
            bbx_dict.pop(track_id)
        else:
            image_dict[track_id] = image_dict[track_id][1:-tracker_max_age]
            bbx_dict[track_id] = bbx_dict[track_id][1:-tracker_max_age]

    # Create the bounding_box_list
    bounding_boxes_list = []
    for track_id in bbx_dict.keys():
        for bbx in bbx_dict[track_id]:
            bounding_boxes_list.append(bbx)

    # Convert the track classed dictionary to a frame classed dictionary
    print('Creating dataset.')
    if not os.path.exists(output_path):
        os.mkdir(output_path)
    frame_to_track_dict = {}
    tbar2 = tqdm.tqdm(image_dict.keys())
    for track_id in tbar2:
        for cropped_image, bbx_idx, frame_idx in image_dict[track_id]:
            if frame_idx not in frame_to_track_dict.keys():
                frame_to_track_dict[frame_idx] = [track_id]
            else:
                frame_to_track_dict[frame_idx].append(track_id)

            # Save image
            dir_name = '{:04d}'.format(track_id)
            image_name = '{:06d}.png'.format(bbx_idx)
            save_path = os.path.join(output_path, dir_name)
            if not os.path.exists(save_path):
                os.mkdir(save_path)
            save_file_path = os.path.join(save_path, image_name)
            cropped_image.save(save_file_path)

    # Find co-occurring tracks
    print('Forming co-occurring tracks file.')
    for frame_idx in frame_to_track_dict.keys():
        track_list = []
        for track_id in frame_to_track_dict[frame_idx]:
            track_list.append(track_id)
        # Note co-occurring tracks
        if len(track_list) > 1:
            track_list = sorted(track_list)
            if track_list not in cooccurring_tracks:
                cooccurring_tracks.append(track_list)

        # Save co-occurring tracksset
        utils.write_list_to_file(
            os.path.join(output_path, "cooccurring_tracks.txt"),
            cooccurring_tracks)
        # Save bbx
        utils.write_list_to_file(os.path.join(output_path, "bbx.txt"),
                                 bounding_boxes_list)

    print('{} co-occurring tracks.'.format(len(cooccurring_tracks)))
示例#7
0
    def __init__(self, annotation_path, movie_path, max_frame, transform=None):

        self.annotation_path = annotation_path
        self.movie_path = movie_path
        self.transform = transform

        # Create video source instance
        print('Initializing video capture at {}'.format(movie_path))
        video_src = Video_Reader(movie_path)

        _, image = video_src.get_frame()

        img_height, img_width, img_channel = image.shape

        print('Reading annotation at {}'.format(annotation_path))
        Annotation_list = Read_Annotation(annotation_path,
                                          (img_width, img_height))

        cropped_image_list = []
        sample_tarkid_list = []

        cooccuring_tracks_list = []
        tracksamplesidxs_dict = {}
        gt_labels_list = []
        classes_to_idx = {}
        idx_to_classes = {}
        gt_idx_list = []
        num_gt_classes = 0

        num_frame = min(len(Annotation_list), max_frame)

        print('Extracting face patches.')

        video_src.reset()
        frame_idx = 0
        image_idx = 0
        tbar = tqdm.tqdm(range(num_frame))
        for j in tbar:

            ret, image = video_src.get_frame()
            if not ret:
                break

            if frame_idx < 0:
                frame_annotations = []
            else:
                frame_annotations = Annotation_list[frame_idx]

            track_list = []
            for annotation in frame_annotations:

                cropped_image = image[annotation[1]:annotation[3],
                                      annotation[0]:annotation[2], :]
                cropped_image = cropped_image[:, :, ::-1]  # BGR to RGB
                # cropped_image = np.transpose(cropped_image, (2, 0, 1))
                cropped_image = Image.fromarray(cropped_image)
                cropped_image_list.append(cropped_image)
                sample_tarkid_list.append(annotation[6])

                if annotation[6] not in tracksamplesidxs_dict.keys():
                    tracksamplesidxs_dict[annotation[6]] = [image_idx]
                else:
                    tracksamplesidxs_dict[annotation[6]].append(image_idx)

                if annotation[4] not in classes_to_idx.keys():
                    classes_to_idx[annotation[4]] = num_gt_classes
                    idx_to_classes[num_gt_classes] = annotation[4]
                    gt_idx_list.append(num_gt_classes)
                    num_gt_classes += 1
                gt_labels_list.append(classes_to_idx[annotation[4]])

                track_list.append(annotation[6])

                image_idx += 1

            # Note co-occuring tracks
            if len(frame_annotations) > 1:
                track_list.sort()
                if track_list not in cooccuring_tracks_list:
                    cooccuring_tracks_list.append(track_list)

            frame_idx += 1
        print('')

        self.cropped_image_list = cropped_image_list
        self.cooccuring_tracks_list = cooccuring_tracks_list
        self.tracksamplesidxs_dict = tracksamplesidxs_dict
        self.sample_tarkid_list = sample_tarkid_list
示例#8
0
    sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)

if __name__ == '__main__':

    # ffmpeg - i bbtS01E01.mkv - vf scale = 1024:576 - r 25 - codec: a copy outputbbt1.mkv

    annotation_path = '/export/livia/data/lemoineh/CVPR2013_PersonID_data/bbt_s01e01_facetracks.mat'
    movie_path = '/export/livia/data/lemoineh/BBT/bbts01e01.mkv'
    dataset_path = '/export/livia/data/lemoineh/BBT/ep01/'

    # Create video source instance
    print('Initializing video capture.')
    video_src = Video_Reader(movie_path)

    bbx = None

    video_out_dir = 'output/'
    if not os.path.exists(video_out_dir):
        os.mkdir(video_out_dir)

    _, image = video_src.get_frame()

    img_height, img_width, img_channel = image.shape

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    filename = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    video_out_filepath = video_out_dir + filename + '.avi'
    out = cv2.VideoWriter(video_out_filepath, fourcc, 25,
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    # Load model
    print('Loading model from checkpoint {}'.format(config.model.checkpoint_path))
    checkpoint = torch.load(config.model.checkpoint_path)
    embedding_size = checkpoint['embedding_size']

    model = models.load_model(config.model.model_arch,
                              device,
                              embedding_size=embedding_size)
    model.load_state_dict(checkpoint['model_state_dict'])

    # Create video source instance
    print('Initializing video capture at {}'.format(config.dataset.bbt.movie_path))
    video_src = Video_Reader(config.dataset.bbt.movie_path)

    if not os.path.exists(config.output.video_dir):
        os.mkdir(config.output.video_dir)

    _, image = video_src.get_frame()

    img_height, img_width, img_channel = image.shape

    # time metrics
    cycle_time = 1.0

    cropped_image_list = []
    track_dict = {}
    frame_dict = {}
    num_frame = config.dataset.bbt.num_frame