示例#1
0
    def _with_predictions(self):
        self._logger.debug('__init__')
        for video_idx, video in enumerate(self._videos):
            filename = re.match(r'[\.\/\w]*\/(\w+).\w+', video.path)
            if filename is None:
                logging.ERROR(
                    'Check paths videos, template to extract video name'
                    ' does not match')
            filename = filename.group(1)
            self._videoname2idx[filename] = video_idx
            self._idx2videoname[video_idx] = filename

            names = np.asarray([video_idx] * video.n_frames).reshape((-1, 1))
            idxs = np.asarray(list(range(0, video.n_frames))).reshape((-1, 1))
            if opt.gt_training:
                gt_file = np.asarray(video._gt).reshape((-1, 1))
            else:
                gt_file = np.asarray(video._z).reshape((-1, 1))
            if self._features is None:
                features = video.features()
            else:
                features = self._features[video.global_range]
            temp_feature_list = join_data(None,
                                          (names, idxs, gt_file, features),
                                          np.hstack)
            self._feature_list = join_data(self._feature_list,
                                           temp_feature_list, np.vstack)
        self._features = None
    def __init__(self, videos, features):
        logger.debug('Relative time labels')
        super().__init__(videos, features)

        temp_features = None  # used only if opt.concat > 1
        for video in self._videos:
            time_label = np.asarray(video.temp).reshape((-1, 1))
            video_features = self._features[video.global_range]

            temp_features = join_data(temp_features, video_features, np.vstack)

            self._gt = join_data(self._gt, time_label, np.vstack)
示例#3
0
    def _init_videos(self):
        logger.debug('.')
        gt_stat = Counter()
        for root, dirs, files in os.walk(opt.data):
            if not files:
                continue
            for filename in files:
                # pick only videos with certain complex action
                # (ex: just concerning coffee)
                if self._subaction in filename:
                    if opt.test_set:
                        if opt.reduced:
                            opt.reduced = opt.reduced - 1
                            continue
                    # if opt.dataset == 'fs':
                    #     gt_name = filename[:-(len(opt.ext) + 1)] + '.txt'
                    # else:
                    match = re.match(r'(.*)\..*', filename)
                    gt_name = match.group(1)
                    # use extracted features from pretrained on gt embedding
                    if opt.load_embed_feat:
                        path = os.path.join(
                            opt.data, 'embed', opt.subaction,
                            opt.resume_str % opt.subaction) + '_%s' % gt_name
                    else:
                        path = os.path.join(root, filename)
                    start = 0 if self._features is None else self._features.shape[
                        0]
                    try:
                        video = Video(path,
                                      K=self._K,
                                      gt=self.gt_map.gt[gt_name],
                                      name=gt_name,
                                      start=start,
                                      with_bg=self._with_bg)
                    except AssertionError:
                        logger.debug('Assertion Error: %s' % gt_name)
                        continue
                    self._features = join_data(self._features,
                                               video.features(), np.vstack)

                    video.reset()  # to not store second time loaded features
                    self._videos.append(video)
                    # accumulate statistic for inverse counts vector for each video
                    gt_stat.update(self.gt_map.gt[gt_name])
                    if opt.reduced:
                        if len(self._videos) > opt.reduced:
                            break

                    if opt.feature_dim > 100:
                        if len(self._videos) % 20 == 0:
                            logger.debug('loaded %d videos' %
                                         len(self._videos))

        # update global range within the current collection for each video
        for video in self._videos:
            video.update_indexes(len(self._features))
        logger.debug('gt statistic: %d videos ' % len(self._videos) +
                     str(gt_stat))
        self._update_fg_mask()
示例#4
0
    def gaussian_model(self):
        logger.debug('Fit Gaussian Mixture Model to the whole dataset at once')
        self._gaussians_fit()
        for video_idx in range(len(self._videos)):
            self._video_likelihood_grid(video_idx)

        if opt.bg:
            scores = None
            for video in self._videos:
                scores = join_data(scores, video.get_likelihood(), np.vstack)

            bg_trh_score = np.sort(scores, axis=0)[int(
                (opt.bg_trh / 100) * scores.shape[0])]

            bg_trh_set = []
            for action_idx in range(self._K):
                new_bg_trh = self._gaussians[
                    action_idx].mean_score - bg_trh_score[action_idx]
                self._gaussians[action_idx].update_trh(new_bg_trh=new_bg_trh)
                bg_trh_set.append(new_bg_trh)

            logger.debug('new bg_trh: %s' % str(bg_trh_set))
            trh_set = []
            for action_idx in range(self._K):
                trh_set.append(self._gaussians[action_idx].trh)
            for video in self._videos:
                video.valid_likelihood_update(trh_set)
示例#5
0
    def regression_training(self):
        if opt.load_embed_feat:
            logger.debug('load precomputed features')
            self._embedded_feat = self._features
            return

        logger.debug('.')

        dataloader = load_reltime(videos=self._videos, features=self._features)

        model, loss, optimizer = mlp.create_model()
        if opt.load_model:
            model.load_state_dict(load_model())
            self._embedding = model
        else:
            self._embedding = training(dataloader,
                                       opt.epochs,
                                       save=opt.save_model,
                                       model=model,
                                       loss=loss,
                                       optimizer=optimizer,
                                       name=opt.model_name)

        self._embedding = self._embedding.cpu()

        unshuffled_dataloader = load_reltime(videos=self._videos,
                                             features=self._features,
                                             shuffle=False)

        gt_relative_time = None
        relative_time = None
        if opt.model_name == 'mlp':
            for batch_features, batch_gtreltime in unshuffled_dataloader:
                if self._embedded_feat is None:
                    self._embedded_feat = batch_features
                else:
                    self._embedded_feat = torch.cat(
                        (self._embedded_feat, batch_features), 0)

                batch_gtreltime = batch_gtreltime.numpy().reshape((-1, 1))
                gt_relative_time = join_data(gt_relative_time, batch_gtreltime,
                                             np.vstack)

            relative_time = self._embedding(
                self._embedded_feat.float()).detach().numpy().reshape((-1, 1))

            self._embedded_feat = self._embedding.embedded(
                self._embedded_feat.float()).detach().numpy()
            self._embedded_feat = np.squeeze(self._embedded_feat)

        if opt.save_embed_feat:
            self.save_embed_feat()

        mse = np.sum((gt_relative_time - relative_time)**2)
        mse = mse / len(relative_time)
        logger.debug('MLP training: MSE: %f' % mse)
    def __init__(self, videos, features):
        logger.debug('Ground Truth labels')
        super().__init__(videos, features)

        for video in self._videos:
            gt_item = np.asarray(video.gt).reshape((-1, 1))
            # video_features = self._videos_features[video.global_range]
            # video_features = join_data(None, (gt_item, video_features),
            #                            np.hstack)
            self._gt = join_data(self._gt, gt_item, np.vstack)
示例#7
0
 def labels(self, new_labels):
     self._labels = join_data(self._labels, new_labels, np.hstack)
     self._sizes += [self.size] * len(new_labels)
示例#8
0
 def data(self, new_data):
     self._data = join_data(self._data, new_data, np.vstack)