def eval_temporal_priors(train_file,
                         test_file,
                         n_prop=NUM_PROPOSALS,
                         filename=None):
    """Run TempPriorsNoScale over a range of number-of-proposals
    """
    ds_train = BaselineData.fromcsv(train_file)
    Xtrain = ds_train.get_temporal_loc()
    ds_test_df = pd.read_csv(test_file, sep=' ')
    Ztest = np.array(ds_test_df.loc[:, 'n-frames'])

    for i, v in enumerate(n_prop):
        if v > Xtrain.shape[0]:
            # Use all annotations as priors ;)
            continue

        m = TempPriorsNoScale(v)
        m.fit(Xtrain)
        Ypred_centered, idx = m.proposals(Ztest, return_index=True)

        Ypred = segment_format(Ypred_centered, 'c2b')
        # Form video-proposals format [f-init, f-end, score]
        vid_prop_all = np.hstack([Ypred, np.zeros((Ypred.shape[0], 1))])
        vid_prop = proposals_per_video(vid_prop_all, v)
        id_prop = dict(
            zip(ds_test_df.loc[:, 'video-name'].tolist(), vid_prop.tolist()))

        if isinstance(filename, str):
            idfile = filename + '.n-prop_{}'.format(v)
            dump_json(idfile, id_prop)
    return None
示例#2
0
def load_proposals(proposal_dir,
                   stride=128,
                   T=256,
                   file_filter=None,
                   priors_filename=None):
    """Load proposal DataFrames from files.
    """
    proposal_df = []
    vds_true = None
    if file_filter:
        vds_true = pd.read_csv(file_filter)['video-name'].tolist()
    filenames = glob.glob(os.path.join(proposal_dir, '*.proposals'))
    priors = None
    if priors_filename:
        priors = hkl.load(priors_filename)
    for f in filenames:
        vid = os.path.basename(f).split('.')[0]
        if file_filter and vid not in vds_true:
            continue
        this_df = pd.read_csv(f, sep=' ', index_col=False)
        if priors_filename:
            n_proposals = priors.shape[0]
            n_segments = this_df.shape[0] / n_proposals
            this_priors = np.tile(priors, (n_segments, 1))
            l_size = this_df['video-frames'].mean()
            f_init_array = np.arange(0, l_size - T, stride)
            map_array = np.stack((f_init_array, np.zeros(n_segments)))
            map_array = map_array.repeat(n_proposals, axis=-1).T
            proposals = segment_format(
                map_array + (this_priors.clip(0, 1) * T), 'c2b').astype(int)
            this_df['f-init'] = proposals[:, 0]
            this_df['f-end'] = proposals[:, 1]
        proposal_df.append(this_df)
    return pd.concat(proposal_df, axis=0)
示例#3
0
    def retrieve_proposals(self, c3d_stack, f_init_array, override=False):
        """Retrieve proposals for multiple streams.

        Parameters
        ----------
        c3d_stack : ndarray
            3d-ndarray [num-streams, seq-length, input-size] with visual
            encoder representation of each stream.
            Note that the first dimension is sequence agnostic so you can
            push as many videos as your HW allows it.
        f_init_array : ndarray.
            1d-ndarray with initial frame of each stream.
        override : bool, optional.
            If True, override predicted locations with anchors. Make sure of
            initialize your instance properly in order to use the anchors.

        Returns
        -------
        proposals : ndarray
            3d-ndarray [num-streams, num-outputs, 2] with proposal locations in
            terms of f-init, f-end.
        conf : ndarray
            2d-ndarray [num-streams, num-outputs] action likelihood of each
            proposal

        Raises
        ------
        ValueError
            Mistmatch between c3d_stack.shape[0] and f_init_array.size

        """
        if c3d_stack.ndim == 2 and c3d_stack.shape[0] == self.seq_length:
            c3d_stack = c3d_stack[np.newaxis, ...]
        if c3d_stack.shape[0] != f_init_array.size:
            raise ValueError('Mismatch between c3d_stack and f_init_array')
        n_streams = c3d_stack.shape[0]

        loc, score = self.forward_pass(floatX(c3d_stack))

        if override and self.anchors is not None:
            loc[:, ...] = self.anchors.reshape(-1)

        # Clip proposals inside receptive field
        loc.clip(0, 1, out=loc)
        loc *= self.receptive_field

        # Shift center to absolute location in the video
        loc = loc.reshape((n_streams, -1, 2))
        loc[:, :, 0] += f_init_array.reshape((n_streams, 1))

        # Transform center 2 boundaries
        proposals = np.reshape(segment_format(loc.reshape((-1, 2)), 'c2b'),
                               (n_streams, -1, 2)).astype(int)
        return proposals, score
def compute_priors(df, T, K=200, iou_thr=0.5, norm_fcn=wrapper_unit_scaling,
                   i_thr=1.0, rng_seed=None):
    """Clustering of ground truth locations

    Parameters
    ----------
    X : DataFrame
        pandas table with annotations of the dataset. It must include the
        following columns data_generation.REQ_INFO_CP
    T : int
        canonical temporal size of evaluation window
    K : int, optional
        number of priors
    iou_thr : float
        IOU threshold to consider that an annotation match with a prior
    norm_fcn : function
        Function to apply over ndarray [m x 2] of segments with
        format :=[f-init, f-end] before computing priors.
    i_thr : float
        ratio [0, 1] to include an annotation inside a segment.
    rng_seed : int
        Seed for random number generator

    Outputs
    -------
    priors : ndarray
        2-dim array of priors discovered. The first dimension iterates over the
        different priors.
    new_df : DataFrame
        Table with information about instances to use in training

    """
    # Input validation
    if not isinstance(df, pd.DataFrame):
        raise ValueError('df argument must be a pd.DataFrame')
    if not set(REQ_INFO_CP).issubset(df.columns.tolist()):
        msg = 'df must include these column names: {}'.format(REQ_INFO_CP)
        raise ValueError(msg)
    if iou_thr > 1 or iou_thr < 0:
        raise ValueError('Invalid value of IOU')

    # Loop over videos
    videos = df['video-name'].unique()
    L = np.empty(videos.size, dtype=int)
    segment_lst, n_seg = [None] * videos.size, np.empty(videos.size, dtype=int)
    mapped_gt_lst, n_gt_lst = [None] * videos.size, [None] * videos.size
    for i, v in enumerate(videos):
        idx = df['video-name'] == v
        L[i] = df.loc[idx, 'video-frames'].mean()
        gtruth_c = df.loc[idx, ['f-init', 'n-frames']]
        gtruth_b = segment_format(np.array(gtruth_c), 'd2b')
        segment_lst[i], gt_list_i, n_gt_lst[i] = generate_segments(
            T, L[i], gtruth_b, method='iou', rng_seed=rng_seed, i_thr=i_thr)
        n_seg[i] = segment_lst[i].shape[0]
        if len(gt_list_i) > 0:
            mapped_gt_lst[i] = np.vstack(gt_list_i)
        else:
            mapped_gt_lst[i] = np.empty((0, 2))

    # Standardize mapped annotations into a common reference + Normalization
    segments = np.vstack(segment_lst)
    mapped_gt = np.vstack(mapped_gt_lst)
    n_gt = np.hstack(n_gt_lst)
    X = norm_fcn(mapped_gt, T, segments, n_gt)

    # Clustering
    model = TempPriorsNoScale(K, rng_seed=rng_seed)
    model.fit(X)
    priors = model.priors

    # Matching
    score = np.empty((segments.shape[0], priors.shape[0]), dtype=int)
    j = 0
    for i, v in enumerate(segment_lst):
        # Scale priors and use boundary format
        mapped_priors_b = segment_format(priors * T, 'c2b')
        s_ref = np.expand_dims(np.repeat(v[:, 0], n_gt_lst[i]), 1)

        # Reference mapped gt on [0 - T] interval
        if mapped_gt_lst[i].size == 0:
            continue
        mapped_gt_i_ref = mapped_gt_lst[i] - s_ref
        if (mapped_gt_i_ref[:, 0] < 0).sum() > 0:
            msg = ('Initial frame must be greater that zero. Running at your '
                   'own risk. Debug is needed.')
            warnings.warn(msg)

        # IOU computation
        iou = segment_iou(mapped_priors_b, mapped_gt_i_ref)

        # Map IOU of priors for each segment
        idx = [0] + np.cumsum(n_gt_lst[i]).tolist()
        max_iou = np.vstack(map(lambda u, v: np.zeros(K, dtype=int)
                                if u == v else iou[:, u:v].max(axis=1),
                                idx[:-1], idx[1::]))
        score[j:j+n_seg[i], :] = max_iou > iou_thr
        j += n_seg[i]

    # Build DataFrame
    col_triads = ['c_{}'.format(i) for i in range(K)]
    new_df = pd.concat([pd.DataFrame({'video-name': videos.repeat(n_seg),
                                      'f-init': segments[:, 0],
                                      'duration': np.repeat(T,
                                                            segments.shape[0]),
                                      'video-frames': np.repeat(L, n_seg)}),
                        pd.DataFrame(score, columns=col_triads)], axis=1)
    return priors, new_df
def wrapper_unit_scaling(x, T, s_ref, n_gt, *args, **kwargs):
    """Normalize segments to unit-length and use center-duration format
    """
    xc = segment_format(x, 'b2c')
    init_ref = np.repeat(s_ref[:, 0], n_gt)
    return segment_unit_scaling(xc, T, init_ref)
def evaluate_priors(df, priors, T, stride=16, iou_thr=0.5,
                    return_recall=False):
    """
    Parameters
    ----------
    df: DataFrame
        Pandas table with annotations of the dataset. It must include the
        following columns data_generation.REQ_INFO_CP
    priors: ndarray
        2-dim array of priors discovered. The first dimension iterates over the
        different priors.
    T: int
        Canonical temporal size of evaluation window.
    stride: int, optional
        Size of the sliding step.
    iou_thr : float, optional
        IOU threshold to consider that an annotation match with a prior.
    return_recall: bool, optional
        Return one extra output (recall, computed at given iou_thr).

    Outputs
    -------
    eval_df: DataFrame
        Table with information about each annotation and its matched prior.
    recall: float
        Recall at given iou threshold.
    """
    # Sanitize input.
    mapped_priors_b = segment_format(priors * T, 'c2b').clip(1, T)
    mapped_priors_b = np.array(mapped_priors_b).astype(np.int)

    # Iterate over each instance.
    best_iou, v_pointer = np.empty(df['video-name'].size), 0
    best_priors_t = np.empty((df['video-name'].size, 2))
    best_priors_index = np.empty(df['video-name'].size)
    for i, sgm_i in df.iterrows():
        # Parsing ground-truth.
        L = sgm_i['video-frames']
        gtruth_c = np.empty((1, 2))
        gtruth_c[0, :] = np.stack([sgm_i['f-init'], sgm_i['n-frames']],
                                  axis=-1)
        gtruth_b = segment_format(gtruth_c, 'd2b')

        # Slide priors over time.
        priors_t, k_idx = compute_priors_over_time(mapped_priors_b, T,
                                                   L, stride)

        # Not found priors for this video.
        if priors_t.shape[0] == 0:
            best_iou[v_pointer] = 0.0
            best_priors_t[v_pointer, :] = np.array([[np.nan, np.nan]])
            best_priors_index[v_pointer] = np.array([np.nan])
            v_pointer += 1
            continue

        # Compute iou and keep the best one for each ground-truth instance.
        iou = segment_iou(gtruth_b, priors_t)
        max_idx = iou.argmax(axis=1)
        best_iou[v_pointer] = iou.flatten()[max_idx]
        best_priors_t[v_pointer, :] = priors_t[max_idx, :]
        best_priors_index[v_pointer] = k_idx[max_idx]
        v_pointer += 1

    # Build DataFrame.
    s_init = best_priors_t[:, 0]
    n_frames = best_priors_t[:, 1] - best_priors_t[:, 0] + 1
    eval_df = pd.concat([df, pd.DataFrame({'priors-f-init': s_init,
                                           'priors-n-frames': n_frames,
                                           'k-idx': best_priors_index,
                                           'iou': best_iou})], axis=1)
    if return_recall:
        n_annotations = eval_df.shape[0]
        recall = (eval_df['iou'] >= iou_thr).sum().astype(float)/n_annotations
        return eval_df, recall
    return eval_df
示例#7
0
def retrieve_proposals(video_name,
                       l_size,
                       network,
                       T=256,
                       stride=128,
                       c3d_size=16,
                       c3d_stride=8,
                       pool_type='mean',
                       hdf5_dataset=None,
                       model_prm=None):
    """Retrieve proposals for an input video.

    Parameters
    ----------
    video_name : str.
        Video identifier.
    l_size : int.
        Size of the video.
    network : (localization, conf).
        Lasagne layers.
    T : int, optional.
        Canonical temporal size of evaluation window.
    stride : int, optional.
        Size of the sliding step.
    c3d_size : int, optional.
        Size of temporal fiel C3D network.
    c3d_stride : int, optional.
        Size of temporal stride between extracted features.
    pool_type : str, optional.
        Global pooling strategy over a bunch of features.
        'mean', 'max', 'pyr-2-mean/max', 'concat-2-mean/max'
    hdf5_dataset : str.
        Path to feature file.

    """
    # IO interface.
    fobj = Feature(filename=hdf5_dataset,
                   t_size=c3d_size,
                   t_stride=c3d_stride,
                   pool_type=pool_type)
    fobj.open_instance()
    # Video scanning.
    f_init_array = np.arange(0, l_size - T, stride)
    feat_stack = fobj.read_feat_batch_from_video(video_name,
                                                 f_init_array,
                                                 duration=T).astype(np.float32)
    if model_prm.startswith('lstm:'):
        user_prm = model_prm.split(':', 1)[1].split(',')
        n_outputs, seq_length, width, depth = user_prm
        feat_stack = feat_stack.reshape(feat_stack.shape[0], int(seq_length),
                                        feat_stack.shape[1] / int(seq_length))

    # Close instance.
    fobj.close_instance()

    # Generate proposals.
    loc, score = forward_pass(network, feat_stack)
    n_proposals = score.shape[1]
    n_segments = score.shape[0]
    score = score.flatten()
    map_array = np.stack(
        (f_init_array, np.zeros(n_segments))).repeat(n_proposals, axis=-1).T
    proposal = segment_format(map_array + (loc.clip(0, 1) * T),
                              'c2b').astype(int)
    return proposal, score