def compute_statistics_worker(dataset, samples, labels, sstats_out,
                              descs_to_sstats, pca, gmm, **kwargs):
    """ Computes the Fisher vectors for each slice that results from the
    temporal spliting of get_time_intervals. The resulting Fisher vectors
    are outputed to a binary file.

    """
    nr_frames_to_skip = kwargs.get('nr_frames_to_skip', 0)
    delta = kwargs.get('delta', 120)
    spacing = kwargs.get('spacing', 1)
    rescale_videos = kwargs.get('rescale_videos', 'none')
    per_shot = kwargs.get('per_shot', False)
    sample_limits_file = kwargs.get('sample_limits', None)

    if sample_limits_file:
        with open(sample_limits_file, 'r') as ff:
            sample_limits = cPickle.load(ff)
    else:
        sample_limits = None

    _, track_len, _ = parse_ip_type(dataset.FTYPE)
    track_len = (nr_frames_to_skip + 1) * int(track_len)

    D = gmm.d
    K = dataset.VOC_SIZE

    for sample, label in izip(samples, labels):
        # Still not very nice. Maybe I should create the file on the else
        # branch.
        if sstats_out.exists(str(sample)):
            continue
        sstats_out.touch(str(sample))

        # The path to the movie.
        infile = os.path.join(
            dataset.SRC_DIR,
            sample.movie + dataset.SRC_EXT)

        status = None
        if rescale_videos != 'none':
            # Rescale movie.
            status, infile = rescale(infile, MAX_WIDTH[rescale_videos],
                                     thresh=50)
            if status == 'bad_encoding':
                print 'Bad encoding ' + sample.movie
                continue

        if per_shot:
            begin_frames, end_frames = dataset.get_shots(sample.movie)
        elif sample_limits:
            begin_frames = sample_limits[sample]['begin_frames']
            end_frames = sample_limits[sample]['end_frames']
        else:
            begin_frames, end_frames = get_time_intervals(
                sample.bf, sample.ef, delta, spacing)

        # Count the number of descriptors for each chunk.
        nr_slices = len(begin_frames)
        N = np.zeros(nr_slices)
        sstats = np.zeros((nr_slices, K + 2 * K * D),
                          dtype=np.float32)

        for chunk in read_descriptors_from_video(
            infile, nr_descriptors=1, begin_frames=begin_frames,
            end_frames=end_frames, nr_skip_frames=nr_frames_to_skip):
            xx = pca.transform(chunk[:, 3:])

            # Determine slice number based on time.
            ii = get_slice_number(
                # Get time stamp of the beginning of the track.
                chunk[:, 2] - track_len + 1, begin_frames, end_frames)
            N[ii] += 1

            # Update corresponding sstats cell.
            sstats[ii] += descs_to_sstats(xx, gmm)

        # Ignore chunks with 0 descriptors
        N_not_null = N[N != 0]
        sstats = sstats[N != 0, :]
        sstats /= N_not_null[:, np.newaxis]
        # Write also the label, the number of descriptors and begin and end
        # frames.
        sstats_out.write(str(sample), sstats, info={
            'label': label,
            'nr_descs': N,
            'begin_frames': begin_frames,
            'end_frames': end_frames})

        if status == 'rescaled':
            os.remove(infile)
Пример #2
0
def compute_statistics_worker(dataset, samples, labels, sstats_out,
                              descs_to_sstats, pca, gmm, **kwargs):
    """ Computes the Fisher vectors for each slice that results from the
    temporal spliting of get_time_intervals. The resulting Fisher vectors
    are outputed to a binary file.

    """
    nr_frames_to_skip = kwargs.get('nr_frames_to_skip', 0)
    delta = kwargs.get('delta', 120)
    spacing = kwargs.get('spacing', 1)
    rescale_videos = kwargs.get('rescale_videos', 'none')
    per_shot = kwargs.get('per_shot', False)
    sample_limits_file = kwargs.get('sample_limits', None)

    if sample_limits_file:
        with open(sample_limits_file, 'r') as ff:
            sample_limits = cPickle.load(ff)
    else:
        sample_limits = None

    _, track_len, _ = parse_ip_type(dataset.FTYPE)
    track_len = (nr_frames_to_skip + 1) * int(track_len)

    D = gmm.d
    K = dataset.VOC_SIZE

    for sample, label in izip(samples, labels):
        # Still not very nice. Maybe I should create the file on the else
        # branch.
        if sstats_out.exists(str(sample)):
            continue
        sstats_out.touch(str(sample))

        # The path to the movie.
        infile = os.path.join(dataset.SRC_DIR, sample.movie + dataset.SRC_EXT)

        status = None
        if rescale_videos != 'none':
            # Rescale movie.
            status, infile = rescale(infile,
                                     MAX_WIDTH[rescale_videos],
                                     thresh=50)
            if status == 'bad_encoding':
                print 'Bad encoding ' + sample.movie
                continue

        if per_shot:
            begin_frames, end_frames = dataset.get_shots(sample.movie)
        elif sample_limits:
            begin_frames = sample_limits[sample]['begin_frames']
            end_frames = sample_limits[sample]['end_frames']
        else:
            begin_frames, end_frames = get_time_intervals(
                sample.bf, sample.ef, delta, spacing)

        # Count the number of descriptors for each chunk.
        nr_slices = len(begin_frames)
        N = np.zeros(nr_slices)
        sstats = np.zeros((nr_slices, K + 2 * K * D), dtype=np.float32)

        for chunk in read_descriptors_from_video(
                infile,
                nr_descriptors=1,
                begin_frames=begin_frames,
                end_frames=end_frames,
                nr_skip_frames=nr_frames_to_skip):
            xx = pca.transform(chunk[:, 3:])

            # Determine slice number based on time.
            ii = get_slice_number(
                # Get time stamp of the beginning of the track.
                chunk[:, 2] - track_len + 1,
                begin_frames,
                end_frames)
            N[ii] += 1

            # Update corresponding sstats cell.
            sstats[ii] += descs_to_sstats(xx, gmm)

        # Ignore chunks with 0 descriptors
        N_not_null = N[N != 0]
        sstats = sstats[N != 0, :]
        sstats /= N_not_null[:, np.newaxis]
        # Write also the label, the number of descriptors and begin and end
        # frames.
        sstats_out.write(str(sample),
                         sstats,
                         info={
                             'label': label,
                             'nr_descs': N,
                             'begin_frames': begin_frames,
                             'end_frames': end_frames
                         })

        if status == 'rescaled':
            os.remove(infile)
Пример #3
0
def compute_statistics_from_video_worker(dataset, samples, labels, sstats_out, descs_to_sstats, pca, gmm, **kwargs):
    """ Computes the Fisher vector directly from the video in an online
    fashion. The chain of actions is the following: compute descriptors one
    by one, get a descriptor and apply PCA to it, then compute the
    posterior probabilities and update the Fisher vector.

    Inputs
    ------
    dataset: Dataset instance
        The dataset on which we are operating.

    samples: list of SampID objects
        For which samples we compute sufficietn statistics.

    sstats_out: SstatsMap instace
        Defines the output location and names.

    descs_to_sstats: callable
        Function that converts the data to sufficient statistics.

    pca: PCA instance
        Used for dimensionality reduction.

    gmm: GMM instance

    Note: it doesn't have implemented multiple grids (spatial pyramids)

    """
    nr_frames_to_skip = kwargs.get("nr_frames_to_skip", 0)
    delta = kwargs.get("delta", 0)
    spacing = kwargs.get("spacing", 0)
    rescale_videos = kwargs.get("rescale_videos", "none")
    sample_limits_file = kwargs.get("sample_limits", None)

    if sample_limits_file:
        with open(sample_limits, "r") as ff:
            sample_limits = cPickle.load(ff)
    else:
        sample_limits = None

    D = gmm.d
    K = dataset.VOC_SIZE

    for sample, label in izip(samples, labels):
        # Still not very nice. Maybe I should create the file on the else
        # branch.
        if sstats_out.exists(str(sample)):
            continue
        sstats_out.touch(str(sample))

        # The path to the movie.
        infile = os.path.join(dataset.SRC_DIR, sample.movie + dataset.SRC_EXT)

        status = None
        if rescale_videos != "none":
            status, infile = rescale(infile, MAX_WIDTH[rescale_videos], thresh=50)
            if status == "bad_encoding":
                print "Bad encoding " + sample.movie
                # sstats_out.remove(str(sample))
                continue

        if sample_limits:
            begin_frames = sample_limits[sample]["begin_frames"]
            end_frames = sample_limits[sample]["end_frames"]
        else:
            begin_frames, end_frames = get_time_intervals(sample.bf, sample.ef, delta, spacing)

        N = 0  # Count the number of descriptors for this sample.
        sstats = np.zeros(K + 2 * K * D, dtype=np.float32)

        for chunk in read_descriptors_from_video(
            infile, begin_frames=begin_frames, end_frames=end_frames, nr_skip_frames=nr_frames_to_skip
        ):

            chunk_size = chunk.shape[0]
            # Apply PCA to the descriptor.
            xx = pca.transform(chunk[:, 3:])

            # Update the sufficient statistics for this sample.
            sstats += descs_to_sstats(xx, gmm) * chunk_size
            N += chunk_size

        sstats /= N  # Normalize statistics.
        sstats_out.write(
            str(sample),
            sstats,
            info={
                "label": label,
                "nr_descs": np.array([N]),
                "begin_frames": np.array([sample.bf]),
                "end_frames": np.array([sample.ef]),
            },
        )

        # Delete rescaled video.
        if status == "rescaled":
            os.remove(infile)