def __init__(self, videos, features): logger.debug('Creating feature dataset') self._features = features self._gt = None # self._videos_features = features self._videos = videos
def gaussian_model(self): logger.debug('Fit Gaussian Mixture Model to the whole dataset at once') self._gaussians_fit() for video_idx in range(len(self._videos)): self._video_likelihood_grid(video_idx) if opt.bg: scores = None for video in self._videos: scores = join_data(scores, video.get_likelihood(), np.vstack) bg_trh_score = np.sort(scores, axis=0)[int( (opt.bg_trh / 100) * scores.shape[0])] bg_trh_set = [] for action_idx in range(self._K): new_bg_trh = self._gaussians[ action_idx].mean_score - bg_trh_score[action_idx] self._gaussians[action_idx].update_trh(new_bg_trh=new_bg_trh) bg_trh_set.append(new_bg_trh) logger.debug('new bg_trh: %s' % str(bg_trh_set)) trh_set = [] for action_idx in range(self._K): trh_set.append(self._gaussians[action_idx].trh) for video in self._videos: video.valid_likelihood_update(trh_set)
def load_data(root_dir, end, subaction, videos=None, names=None, features=None): """Create dataloader within given conditions Args: root_dir: path to root directory with features end: extension of files subaction: complex activity videos: collection of object of class Video names: empty list as input to have opportunity to return dictionary with correspondences between names and indices features: features for the whole video collection regression: regression training vae: dataset for vae with incorporated relative time in features Returns: iterative dataloader number of subactions in current complex activity """ logger.debug('create DataLoader') dataset = FeatureDataset(root_dir, end, subaction, videos=videos, features=features) if names is not None: names[0] = dataset.index2name() dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=(not opt.save_embed_feat), num_workers=opt.num_workers) return dataloader, dataset.n_subact()
def wrap(*args, **kwargs): time1 = time.time() ret = f(*args, **kwargs) time2 = time.time() logger.debug('%s took %0.3f ms ~ %0.3f min ~ %0.3f sec' % (f, (time2 - time1) * 1000.0, (time2 - time1) / 60.0, (time2 - time1))) return ret
def _create_voting_table(self): """Filling table with assignment scores. Create table which represents paired label assignments, i.e. each cell comprises score for corresponding label assignment""" size = max(len(np.unique(self._gt_labels_subset)), len(np.unique(self._predicted_labels))) self._voting_table = np.zeros((size, size)) for idx_gt, gt_label in enumerate(np.unique(self._gt_labels_subset)): self._gt_label2index[gt_label] = idx_gt self._gt_index2label[idx_gt] = gt_label if len(self._gt_label2index) < size: for idx_gt in range(len(np.unique(self._gt_labels_subset)), size): gt_label = idx_gt while gt_label in self._gt_label2index: gt_label += 1 self._gt_label2index[gt_label] = idx_gt self._gt_index2label[idx_gt] = gt_label for idx_pr, pr_label in enumerate(np.unique(self._predicted_labels)): self._pr_label2index[pr_label] = idx_pr self._pr_index2label[idx_pr] = pr_label if len(self._pr_label2index) < size: for idx_pr in range(len(np.unique(self._predicted_labels)), size): pr_label = idx_pr while pr_label in self._pr_label2index: pr_label += 1 self._pr_label2index[pr_label] = idx_pr self._pr_index2label[idx_pr] = pr_label for idx_gt, gt_label in enumerate(np.unique(self._gt_labels_subset)): if gt_label in list(self.exclude.keys()): continue gt_mask = self._gt_labels_subset == gt_label for idx_pr, pr_label in enumerate(np.unique( self._predicted_labels)): if pr_label in list(self.exclude.values()): continue self._voting_table[idx_gt, idx_pr] = \ np.sum(self._predicted_labels[gt_mask] == pr_label, dtype=float) for key, val in self.exclude.items(): # works only if one pair in exclude assert len(self.exclude) == 1 try: self._voting_table[ self._gt_label2index[key], self._pr_label2index[val[0]]] = size * np.max( self._voting_table) except KeyError: logger.debug('No background!') self._voting_table[self._gt_label2index[key], -1] = size * np.max(self._voting_table) self._pr_index2label[size - 1] = val[0] self._pr_label2index[val[0]] = size - 1
def _update_fg_mask(self): logger.debug('.') if self._with_bg: self._total_fg_mask = np.zeros(len(self._features), dtype=bool) for video in self._videos: self._total_fg_mask[np.nonzero( video.global_range)[0][video.fg_mask]] = True else: self._total_fg_mask = np.ones(len(self._features), dtype=bool)
def resume_segmentation(iterations=10): logger.debug('Resume segmentation') corpus = Corpus(subaction=opt.action) for iteration in range(iterations): logger.debug('Iteration %d' % iteration) corpus.iter = iteration corpus.resume_segmentation() corpus.accuracy_corpus() corpus.accuracy_corpus()
def load_ground_truth(videos, features, shuffle=True): logger.debug( 'load data with ground truth labels for training some embedding') dataset = GTDataset(videos, features) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=shuffle, num_workers=opt.num_workers) return dataloader
def __init__(self, videos, features): logger.debug('Ground Truth labels') super().__init__(videos, features) for video in self._videos: gt_item = np.asarray(video.gt).reshape((-1, 1)) # video_features = self._videos_features[video.global_range] # video_features = join_data(None, (gt_item, video_features), # np.hstack) self._gt = join_data(self._gt, gt_item, np.vstack)
def f1(self): self._finish_init() for iteration in range(self.n_experiments): self._sampling() f1_mean = np.mean(self.f1_scores) logger.debug('f1 score: %f' % f1_mean) self._n_true_seg_all /= self.n_experiments self._return['precision'] = [ self._n_true_seg_all, (self._K * self._n_videos) ] self._return['recall'] = [self._n_true_seg_all, len(self.bound_masks)] self._return['mean_f1'] = [f1_mean, 1]
def all_actions(actions): return_stat_all = None lr_init = opt.lr for action in actions: opt.subaction = action if not opt.resume: opt.lr = lr_init update_opt_str() return_stat_single = temp_embed() return_stat_all = join_return_stat(return_stat_all, return_stat_single) logger.debug(return_stat_all) parse_return_stat(return_stat_all)
def __init__(self, videos, features): logger.debug('Relative time labels') super().__init__(videos, features) temp_features = None # used only if opt.concat > 1 for video in self._videos: time_label = np.asarray(video.temp).reshape((-1, 1)) video_features = self._features[video.global_range] temp_features = join_data(temp_features, video_features, np.vstack) self._gt = join_data(self._gt, time_label, np.vstack)
def __init__(self, videos, features, K, embedding=None): # todo: define bunch of parameters for the corpus subaction = '' logger.debug('%s' % subaction) super().__init__(K=K, subaction=subaction) self._videos = list(np.array(videos).copy()) self._features = features.copy() self._embedding = embedding self._update_fg_mask()
def _init_videos(self): logger.debug('.') gt_stat = Counter() for root, dirs, files in os.walk(opt.data): if not files: continue for filename in files: # pick only videos with certain complex action # (ex: just concerning coffee) if self._subaction in filename: if opt.test_set: if opt.reduced: opt.reduced = opt.reduced - 1 continue # if opt.dataset == 'fs': # gt_name = filename[:-(len(opt.ext) + 1)] + '.txt' # else: match = re.match(r'(.*)\..*', filename) gt_name = match.group(1) # use extracted features from pretrained on gt embedding if opt.load_embed_feat: path = os.path.join( opt.data, 'embed', opt.subaction, opt.resume_str % opt.subaction) + '_%s' % gt_name else: path = os.path.join(root, filename) start = 0 if self._features is None else self._features.shape[ 0] try: video = Video(path, K=self._K, gt=self.gt_map.gt[gt_name], name=gt_name, start=start, with_bg=self._with_bg) except AssertionError: logger.debug('Assertion Error: %s' % gt_name) continue self._features = join_data(self._features, video.features(), np.vstack) video.reset() # to not store second time loaded features self._videos.append(video) # accumulate statistic for inverse counts vector for each video gt_stat.update(self.gt_map.gt[gt_name]) if opt.reduced: if len(self._videos) > opt.reduced: break if opt.feature_dim > 100: if len(self._videos) % 20 == 0: logger.debug('loaded %d videos' % len(self._videos)) # update global range within the current collection for each video for video in self._videos: video.update_indexes(len(self._features)) logger.debug('gt statistic: %d videos ' % len(self._videos) + str(gt_stat)) self._update_fg_mask()
def load_reltime(videos, features, shuffle=True): logger.debug('load data with temporal labels as ground truth') torch.manual_seed(opt.seed) np.random.seed(opt.seed) if opt.model_name == 'mlp': dataset = RelTimeDataset(videos, features) if opt.model_name == 'tcn': dataset = TCNDataset(videos, features) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=shuffle, num_workers=opt.num_workers) return dataloader
def load_model(): if opt.loaded_model_name: resume_str = opt.loaded_model_name % opt.subaction # resume_str = opt.resume_str else: resume_str = opt.log_str + '.pth.tar' # opt.loaded_model_name = resume_str if opt.device == 'cpu': checkpoint = torch.load(join(opt.dataset_root, 'models', '%s' % resume_str), map_location='cpu') else: checkpoint = torch.load( join(opt.dataset_root, 'models', '%s' % resume_str)) checkpoint = checkpoint['state_dict'] logger.debug('loaded model: ' + '%s' % resume_str) return checkpoint
def parse_return_stat(stat): keys = ['mof', 'mof_bg', 'iou', 'iou_bg', 'f1', 'mean_f1'] for key in keys: if key == 'f1': _eps = 1e-8 n_tr_seg, n_seg = stat['precision'] precision = n_tr_seg / n_seg _, n_tr_seg = stat['recall'] recall = n_tr_seg / n_tr_seg val = 2 * (precision * recall) / (precision + recall + _eps) else: v1, v2 = stat[key] if key == 'iou_bg': v2 += 1 # bg class val = v1 / v2 logger.debug('%s: %f' % (key, val))
def viterbi_decoding(self): logger.debug('.') self._count_subact() pr_orders = [] for video_idx, video in enumerate(self._videos): if video_idx % 20 == 0: logger.debug('%d / %d' % (video_idx, len(self._videos))) self._count_subact() logger.debug(str(self._subact_counter)) if opt.bg: video.update_fg_mask() video.viterbi() cur_order = list(video._pi) if cur_order not in pr_orders: logger.debug(str(cur_order)) pr_orders.append(cur_order) self._count_subact() logger.debug(str(self._subact_counter))
def load_gt(self): self.gt = self.load_obj('gt%d%s' % (self.frequency, opt.gr_lev)) self.order = self.load_obj('order%d%s' % (self.frequency, opt.gr_lev)) if self.gt is None or self.order is None: logger.debug('cannot load -> create mapping') self.gt, self.order = {}, {} for root, dirs, files in os.walk(opt.gt): for filename in files: with open(os.path.join(root, filename), 'r') as f: labels = [] local_order = [] curr_lab = -1 start, end = 0, 0 for line_idx, line in enumerate(f): if line_idx % self.frequency: continue line = line.split()[0] try: labels.append(self.label2index[line]) if curr_lab != labels[-1]: if curr_lab != -1: local_order.append( [curr_lab, start, end]) curr_lab = labels[-1] start = end end += 1 except KeyError: break else: # executes every times when "for" wasn't interrupted by break self.gt[filename] = np.array(labels) # add last labels local_order.append([curr_lab, start, end]) self.order[filename] = local_order self.save_obj(self.gt, 'gt%d%s' % (self.frequency, opt.gr_lev)) self.save_obj(self.order, 'order%d%s' % (self.frequency, opt.gr_lev)) else: logger.debug('successfully loaded')
def __init__(self, subaction='coffee', K=None): """ Args: Q: number of Gaussian components in each mixture subaction: current name of complex activity """ np.random.seed(opt.seed) self.gt_map = GroundTruth(frequency=opt.frame_frequency) self.gt_map.load_mapping() self._K = self.gt_map.define_K(subaction=subaction) if K is None else K logger.debug('%s subactions: %d' % (subaction, self._K)) self.iter = 0 self.return_stat = {} self._acc_old = 0 self._videos = [] self._subaction = subaction # init with ones for consistency with first measurement of MoF self._subact_counter = np.ones(self._K) self._gaussians = {} self._inv_count_stat = np.zeros(self._K) self._embedding = None self._gt2label = None self._label2gt = {} self._with_bg = opt.bg self._total_fg_mask = None # multiprocessing for sampling activities for each video self._features = None self._embedded_feat = None self._init_videos() # logger.debug('min: %f max: %f avg: %f' % # (np.min(self._features), # np.max(self._features), # np.mean(self._features))) # to save segmentation of the videos dir_check(os.path.join(opt.output_dir, 'segmentation')) dir_check(os.path.join(opt.output_dir, 'likelihood')) self.vis = None # visualization tool
def iou_classes(self): average_class_iou = 0 excluded_iou = 0 for key, val in self._classes_IoU.items(): true_frames, union = val logger.debug('label %d: %f %d / %d' % (key, true_frames / union, true_frames, union)) if key not in self.exclude: average_class_iou += true_frames / union else: excluded_iou += true_frames / union average_iou_without_exc = average_class_iou / \ (len(self._classes_IoU) - len(self.exclude)) average_iou_with_exc = (average_class_iou + excluded_iou) / \ len(self._classes_IoU) logger.debug('average IoU: %f' % average_iou_without_exc) self._return['iou'] = [ average_class_iou, len(self._classes_IoU) - len(self.exclude) ] self._return['iou_bg'] = [ average_class_iou + excluded_iou, len(self._classes_IoU) - len(self.exclude) ] if self.exclude: logger.debug('average IoU with bg: %f' % average_iou_with_exc)
def update_opt_str(): logs_args_map = { 'model_name': '', 'reduced': 'size', 'epochs': 'ep', 'embed_dim': 'dim', # 'data_type': 'data', 'lr': 'lr', # 'dataset': '', 'bg': 'bg', 'f_norm': 'nm' } if opt.bg: logs_args_map['bg_trh'] = 'bg' # if opt.dataset == 'fs': # logs_args_map['gr_lev'] = 'gr' log_str = '' logs_args = ['prefix', 'subaction'] + sorted(logs_args_map) logs_args_map['prefix'] = '' logs_args_map['subaction'] = '' for arg in logs_args: attr = getattr(opt, arg) arg = logs_args_map[arg] if isinstance(attr, bool): if attr: attr = arg else: attr = '!' + arg else: attr = '%s%s' % (arg, str(attr)) log_str += '%s_' % attr opt.log_str = log_str vars_iter = list(vars(opt)) for arg in sorted(vars_iter): logger.debug('%s: %s' % (arg, getattr(opt, arg)))
def pipeline(self, iterations=1, epochs=30, dim=20, lr=1e-3): opt.epochs = epochs opt.resume = False opt.embed_dim = dim opt.lr = lr assert self._embedding is not None self._embedded_feat = torch.Tensor(self._features) self._embedded_feat = self._embedding.embedded(self._embedded_feat).detach().numpy() self.clustering() for iteration in range(iterations): logger.debug('Iteration %d' % iteration) self.iter = iteration self.clustering() self.gaussian_model() self.accuracy_corpus() self.viterbi_decoding() self.accuracy_corpus('final')
def temp_embed(): corpus = Corpus( subaction=opt.subaction) # loads all videos, features, and gt logger.debug('Corpus with poses created') if opt.model_name in ['mlp']: # trains or loads a new model and uses it to extracxt temporal embeddings for each video corpus.regression_training() if opt.model_name == 'nothing': corpus.without_temp_emed() corpus.clustering() corpus.gaussian_model() corpus.accuracy_corpus() if opt.resume_segmentation: corpus.resume_segmentation() else: corpus.viterbi_decoding() corpus.accuracy_corpus('final') return corpus.return_stat
def regression_training(self): if opt.load_embed_feat: logger.debug('load precomputed features') self._embedded_feat = self._features return logger.debug('.') dataloader = load_reltime(videos=self._videos, features=self._features) model, loss, optimizer = mlp.create_model() if opt.load_model: model.load_state_dict(load_model()) self._embedding = model else: self._embedding = training(dataloader, opt.epochs, save=opt.save_model, model=model, loss=loss, optimizer=optimizer, name=opt.model_name) self._embedding = self._embedding.cpu() unshuffled_dataloader = load_reltime(videos=self._videos, features=self._features, shuffle=False) gt_relative_time = None relative_time = None if opt.model_name == 'mlp': for batch_features, batch_gtreltime in unshuffled_dataloader: if self._embedded_feat is None: self._embedded_feat = batch_features else: self._embedded_feat = torch.cat( (self._embedded_feat, batch_features), 0) batch_gtreltime = batch_gtreltime.numpy().reshape((-1, 1)) gt_relative_time = join_data(gt_relative_time, batch_gtreltime, np.vstack) relative_time = self._embedding( self._embedded_feat.float()).detach().numpy().reshape((-1, 1)) self._embedded_feat = self._embedding.embedded( self._embedded_feat.float()).detach().numpy() self._embedded_feat = np.squeeze(self._embedded_feat) if opt.save_embed_feat: self.save_embed_feat() mse = np.sum((gt_relative_time - relative_time)**2) mse = mse / len(relative_time) logger.debug('MLP training: MSE: %f' % mse)
def create_model(): torch.manual_seed(opt.seed) model = MLP().to(opt.device) loss = nn.MSELoss(reduction='sum') # loss = nn.MSELoss().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) logger.debug(str(model)) logger.debug(str(loss)) logger.debug(str(optimizer)) return model, loss, optimizer
def mof_classes(self): average_class_mof = 0 total_true = 0 total = 0 for key, val in self._classes_MoF.items(): true_frames, all_frames = val logger.debug( 'label %d: %f %d / %d' % (key, true_frames / all_frames, true_frames, all_frames)) average_class_mof += true_frames / all_frames total_true += true_frames total += all_frames average_class_mof /= len(self._classes_MoF) logger.debug('average class mof: %f' % average_class_mof) self._return['mof'] = [self._frames_true_pr, self._frames_overall] self._return['mof_bg'] = [total_true, total] if opt.bg: logger.debug('mof with bg: %f' % (total_true / total))
def _init_videos(self): logger.debug('nothing should happen')
def resume_segmentation(self): logger.debug('resume precomputed segmentation') for video in self._videos: video.iter = self.iter video.resume() self._count_subact()
def training(train_loader, epochs, save, **kwargs): """Training pipeline for embedding. Args: train_loader: iterator within dataset epochs: how much training epochs to perform n_subact: number of subactions in current complex activity mnist: if training with mnist dataset (just to test everything how well it works) Returns: trained pytorch model """ logger.debug('create model') # make everything deterministic -> seed setup torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) np.random.seed(opt.seed) random.seed(opt.seed) torch.backends.cudnn.deterministic = True model = kwargs['model'] loss = kwargs['loss'] optimizer = kwargs['optimizer'] cudnn.benchmark = True batch_time = Averaging() data_time = Averaging() losses = Averaging() adjustable_lr = opt.lr logger.debug('epochs: %s', epochs) for epoch in range(epochs): # model.cuda() model.to(opt.device) model.train() logger.debug('Epoch # %d' % epoch) if opt.lr_adj: # if epoch in [int(epochs * 0.3), int(epochs * 0.7)]: # if epoch in [int(epochs * 0.5)]: if epoch % 30 == 0 and epoch > 0: adjustable_lr = adjust_lr(optimizer, adjustable_lr) logger.debug('lr: %f' % adjustable_lr) end = time.time() for i, (features, labels) in enumerate(train_loader): data_time.update(time.time() - end) features = features.float() labels = labels.float().to(opt.device) if opt.device == 'cuda': features = features.cuda(non_blocking=True) # features = features.float().cuda(non_blocking=True) # labels = labels.float().cuda() output = model(features) loss_values = loss(output, labels) losses.update(loss_values.item(), features.size(0)) optimizer.zero_grad() loss_values.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % 100 == 0 and i: logger.debug( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) logger.debug('loss: %f' % losses.avg) losses.reset() opt.resume_str = join(opt.dataset_root, 'models', '%s.pth.tar' % opt.log_str) if save: save_dict = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } dir_check(join(opt.dataset_root, 'models')) torch.save(save_dict, opt.resume_str) return model