def _with_predictions(self): self._logger.debug('__init__') for video_idx, video in enumerate(self._videos): filename = re.match(r'[\.\/\w]*\/(\w+).\w+', video.path) if filename is None: logging.ERROR( 'Check paths videos, template to extract video name' ' does not match') filename = filename.group(1) self._videoname2idx[filename] = video_idx self._idx2videoname[video_idx] = filename names = np.asarray([video_idx] * video.n_frames).reshape((-1, 1)) idxs = np.asarray(list(range(0, video.n_frames))).reshape((-1, 1)) if opt.gt_training: gt_file = np.asarray(video._gt).reshape((-1, 1)) else: gt_file = np.asarray(video._z).reshape((-1, 1)) if self._features is None: features = video.features() else: features = self._features[video.global_range] temp_feature_list = join_data(None, (names, idxs, gt_file, features), np.hstack) self._feature_list = join_data(self._feature_list, temp_feature_list, np.vstack) self._features = None
def __init__(self, videos, features): logger.debug('Relative time labels') super().__init__(videos, features) temp_features = None # used only if opt.concat > 1 for video in self._videos: time_label = np.asarray(video.temp).reshape((-1, 1)) video_features = self._features[video.global_range] temp_features = join_data(temp_features, video_features, np.vstack) self._gt = join_data(self._gt, time_label, np.vstack)
def _init_videos(self): logger.debug('.') gt_stat = Counter() for root, dirs, files in os.walk(opt.data): if not files: continue for filename in files: # pick only videos with certain complex action # (ex: just concerning coffee) if self._subaction in filename: if opt.test_set: if opt.reduced: opt.reduced = opt.reduced - 1 continue # if opt.dataset == 'fs': # gt_name = filename[:-(len(opt.ext) + 1)] + '.txt' # else: match = re.match(r'(.*)\..*', filename) gt_name = match.group(1) # use extracted features from pretrained on gt embedding if opt.load_embed_feat: path = os.path.join( opt.data, 'embed', opt.subaction, opt.resume_str % opt.subaction) + '_%s' % gt_name else: path = os.path.join(root, filename) start = 0 if self._features is None else self._features.shape[ 0] try: video = Video(path, K=self._K, gt=self.gt_map.gt[gt_name], name=gt_name, start=start, with_bg=self._with_bg) except AssertionError: logger.debug('Assertion Error: %s' % gt_name) continue self._features = join_data(self._features, video.features(), np.vstack) video.reset() # to not store second time loaded features self._videos.append(video) # accumulate statistic for inverse counts vector for each video gt_stat.update(self.gt_map.gt[gt_name]) if opt.reduced: if len(self._videos) > opt.reduced: break if opt.feature_dim > 100: if len(self._videos) % 20 == 0: logger.debug('loaded %d videos' % len(self._videos)) # update global range within the current collection for each video for video in self._videos: video.update_indexes(len(self._features)) logger.debug('gt statistic: %d videos ' % len(self._videos) + str(gt_stat)) self._update_fg_mask()
def gaussian_model(self): logger.debug('Fit Gaussian Mixture Model to the whole dataset at once') self._gaussians_fit() for video_idx in range(len(self._videos)): self._video_likelihood_grid(video_idx) if opt.bg: scores = None for video in self._videos: scores = join_data(scores, video.get_likelihood(), np.vstack) bg_trh_score = np.sort(scores, axis=0)[int( (opt.bg_trh / 100) * scores.shape[0])] bg_trh_set = [] for action_idx in range(self._K): new_bg_trh = self._gaussians[ action_idx].mean_score - bg_trh_score[action_idx] self._gaussians[action_idx].update_trh(new_bg_trh=new_bg_trh) bg_trh_set.append(new_bg_trh) logger.debug('new bg_trh: %s' % str(bg_trh_set)) trh_set = [] for action_idx in range(self._K): trh_set.append(self._gaussians[action_idx].trh) for video in self._videos: video.valid_likelihood_update(trh_set)
def regression_training(self): if opt.load_embed_feat: logger.debug('load precomputed features') self._embedded_feat = self._features return logger.debug('.') dataloader = load_reltime(videos=self._videos, features=self._features) model, loss, optimizer = mlp.create_model() if opt.load_model: model.load_state_dict(load_model()) self._embedding = model else: self._embedding = training(dataloader, opt.epochs, save=opt.save_model, model=model, loss=loss, optimizer=optimizer, name=opt.model_name) self._embedding = self._embedding.cpu() unshuffled_dataloader = load_reltime(videos=self._videos, features=self._features, shuffle=False) gt_relative_time = None relative_time = None if opt.model_name == 'mlp': for batch_features, batch_gtreltime in unshuffled_dataloader: if self._embedded_feat is None: self._embedded_feat = batch_features else: self._embedded_feat = torch.cat( (self._embedded_feat, batch_features), 0) batch_gtreltime = batch_gtreltime.numpy().reshape((-1, 1)) gt_relative_time = join_data(gt_relative_time, batch_gtreltime, np.vstack) relative_time = self._embedding( self._embedded_feat.float()).detach().numpy().reshape((-1, 1)) self._embedded_feat = self._embedding.embedded( self._embedded_feat.float()).detach().numpy() self._embedded_feat = np.squeeze(self._embedded_feat) if opt.save_embed_feat: self.save_embed_feat() mse = np.sum((gt_relative_time - relative_time)**2) mse = mse / len(relative_time) logger.debug('MLP training: MSE: %f' % mse)
def __init__(self, videos, features): logger.debug('Ground Truth labels') super().__init__(videos, features) for video in self._videos: gt_item = np.asarray(video.gt).reshape((-1, 1)) # video_features = self._videos_features[video.global_range] # video_features = join_data(None, (gt_item, video_features), # np.hstack) self._gt = join_data(self._gt, gt_item, np.vstack)
def labels(self, new_labels): self._labels = join_data(self._labels, new_labels, np.hstack) self._sizes += [self.size] * len(new_labels)
def data(self, new_data): self._data = join_data(self._data, new_data, np.vstack)