Exemplo n.º 1
0
def grouped_pca(grouped_features, n_components: int, pca_models_by_group=None):
    # grouped_features: Dict[group_name: str, Dict[vid_name: str, np array]]
    if pca_models_by_group is not None:
        assert set(grouped_features.keys()) == set(pca_models_by_group.keys())
    else:
        pca_models_by_group = {}
        for group_name, vid_dict in grouped_features.items():
            # rows should be data points, so all groups should have the same number of cols
            assert all_equal(v.shape[1] for v in vid_dict.values())
            X_l = []
            for vid, features in vid_dict.items():
                X_l.append(features)
            X = np.vstack(X_l)
            pca = PCA(n_components=min(n_components, X.shape[1]))
            pca.fit(X)
            logger.debug("group {}: {} instances".format(group_name, len(X_l)))
            logger.debug("group {}: pca explained {} of the variance".format(
                group_name, pca.explained_variance_ratio_.sum()))
            pca_models_by_group[group_name] = pca
    transformed = {
        group_name: {
            vid_name: pca_models_by_group[group_name].transform(x)
            for vid_name, x in vid_dict.items()
        }
        for group_name, vid_dict in grouped_features.items()
    }
    return transformed, pca_models_by_group
Exemplo n.º 2
0
def merge_grouped(grouped_features):
    # grouped_features: Dict[group_name: str, Dict[vid_name: str, np array]]
    merged = {}
    # should have the same vid_name s for each group_name
    assert all_equal(group_dict.keys()
                     for group_dict in grouped_features.values())
    for vid_name in next(iter(grouped_features.values())):
        values = [
            t[1][vid_name]
            for t in sorted(grouped_features.items(), key=lambda t: t[0])
        ]
        merged[vid_name] = np.hstack(values)
    return merged
Exemplo n.º 3
0
 def forward(self, features, lengths, valid_classes_per_instance=None):
     # batch_size x max_len x seq_hidden_size
     encoded = self.encoder(features, lengths, output_padding_value=0)
     # batch_size x max_len x num_classes
     logits = self.proj(encoded)
     if valid_classes_per_instance is not None:
         assert all_equal(set(vc.detach().cpu().numpy()) for vc in
                          valid_classes_per_instance), "must have same valid_classes for all instances in the batch"
         valid_classes = valid_classes_per_instance[0]
         mask = torch.full_like(logits, -float("inf"))
         mask[:,:,valid_classes] = 0
         logits = logits + mask
     return logits
Exemplo n.º 4
0
 def predict(self, test_data: Datasplit):
     self.model.eval()
     predictions = {}
     loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1)
     for batch in loader:
         features = batch['features']
         lengths = batch['lengths']
         task_indices = batch['task_indices']
         if self.args.cuda:
             features = features.cuda()
             lengths = lengths.cuda()
             task_indices = [indx.cuda() for indx in task_indices]
         videos = batch['video_name']
         assert all_equal(videos)
         video = next(iter(videos))
         # batch_size x length x num_classes
         with torch.no_grad():
             logits = self.model(features, lengths, valid_classes_per_instance=task_indices)
             preds = logits.max(dim=-1)[1]
             preds = preds.squeeze(0)
             assert preds.ndim == 1
             predictions[video] = preds.detach().cpu().numpy()
     return predictions
    def viterbi(self, features, lengths, valid_classes_per_instance, add_eos=True, use_mean_z=False,
                additional_allowed_ends_per_instance=None, constraints=None, predict_single=False, return_elp=False):
        if valid_classes_per_instance is not None:
            assert all_equal(set(vc.detach().cpu().numpy()) for vc in
                             valid_classes_per_instance), "must have same valid_classes for all instances in the batch"
            valid_classes = valid_classes_per_instance[0]
            C = len(valid_classes)
        else:
            valid_classes = None
            C = self.n_classes
        scores, log_det, elp = self.score_features(features, lengths, valid_classes, add_eos=add_eos, use_mean_z=use_mean_z,
                                     additional_allowed_ends_per_instance=additional_allowed_ends_per_instance,
                                     constraints=constraints, return_elp=True)
        if add_eos:
            eos_lengths = lengths + 1
        else:
            eos_lengths = lengths
        dist = SemiMarkovCRF(scores, lengths=eos_lengths)

        pred_spans, extra = dist.struct.from_parts(dist.argmax)
        # convert to class labels
        # pred_spans_trim = self.model.trim(pred_spans, lengths, check_eos=add_eos)

        pred_spans_unmap = pred_spans.detach().cpu()
        if valid_classes is not None:
            mapping = {index: cls.item() for index, cls in enumerate(valid_classes)}
            assert len(mapping.values()) == len(mapping), "valid_classes must be unique"
            assert -1 not in mapping.values()
            mapping[-1] = -1
            mapping[C] = self.n_classes  # map EOS
            # unmap
            pred_spans_unmap.apply_(lambda x: mapping[x])

        if return_elp:
            return pred_spans_unmap, elp
        else:
            return pred_spans_unmap
Exemplo n.º 6
0
class BreakfastCorpus(Corpus):
    BACKGROUND_LABELS = ["SIL"]

    TASKS = [
        'coffee', 'cereals', 'tea', 'milk', 'juice', 'sandwich',
        'scrambledegg', 'friedegg', 'salat', 'pancake'
    ]

    DATASPLITS = {
        's1': ["P{:02d}".format(d) for d in range(3, 16)],
        's2': ["P{:02d}".format(d) for d in range(16, 29)],
        's3': ["P{:02d}".format(d) for d in range(29, 42)],
        's4': ["P{:02d}".format(d) for d in range(42, 55)],
    }
    assert all_equal(len(v) for v in DATASPLITS.values())

    def __init__(self,
                 mapping_file,
                 feature_root,
                 label_root,
                 task_specific_steps=False):
        self._mapping_file = mapping_file
        self._feature_root = feature_root
        self._label_root = label_root
        self._task_specific_steps = task_specific_steps
        assert not task_specific_steps
        self.annotate_background_with_previous = False

        super(BreakfastCorpus,
              self).__init__(background_labels=self.BACKGROUND_LABELS)

    def _get_components_for_label(self, label):
        return label.split('_')

    def _load_mapping(self):
        with open(self._mapping_file, 'r') as f:
            for line in f:
                index, label = line.strip().split()
                index = int(index)
                _index = self._index(label)
                if label in self._background_labels:
                    assert index in self._background_indices
                if index in self._background_indices:
                    assert label in self._background_labels
                assert _index == index

    def get_datasplit(self,
                      remove_background,
                      task_filter=None,
                      splits=None,
                      full=True,
                      subsample=1,
                      feature_downscale=1.0,
                      feature_permutation_seed=None):
        return BreakfastDatasplit(
            self,
            remove_background,
            task_filter=task_filter,
            splits=splits,
            full=full,
            subsample=subsample,
            feature_downscale=feature_downscale,
            feature_permutation_seed=feature_permutation_seed)
Exemplo n.º 7
0
    def predict(self, test_data):
        self.model.eval()
        self.model.flatten_parameters()
        predictions = {}
        loader = make_data_loader(self.args, test_data, shuffle=False, batch_by_task=True, batch_size=self.args.batch_size)
        # print('{} videos in prediction data'.format(len(loader.dataset)))
        # for batch in tqdm.tqdm(loader, ncols=80):
        for batch in loader:
            features = batch['features']
            task_indices = batch['task_indices']
            lengths = batch['lengths']

            # add a batch dimension
            # lengths = torch.LongTensor([features.size(0)]).unsqueeze(0)
            # features = features.unsqueeze(0)
            # task_indices = task_indices.unsqueeze(0)

            videos = batch['video_name']
            tasks = batch['task_name']
            assert len(set(tasks)) == 1
            task = next(iter(tasks))

            if 'test' in self.args.sm_constrain_with_narration:
                assert all_equal(tasks)
                constraints_expanded = self.expand_constraints(
                    test_data, task, task_indices[0], 1 - batch['constraints']
                )
                constraints_expanded *= self.args.sm_constrain_narration_weight
            else:
                constraints_expanded = None

            if self.args.cuda:
                features = features.cuda()
                task_indices = [ti.cuda() for ti in task_indices]
                lengths = lengths.cuda()
                if constraints_expanded is not None:
                    constraints_expanded = constraints_expanded.cuda()

            addl_allowed_ends = self.make_additional_allowed_ends(tasks, lengths)

            def predict(constraints):
                # TODO: figure out under which eval conditions use_mean_z should be False
                pred_spans, elp = self.model.viterbi(features, lengths, task_indices, add_eos=True, use_mean_z=True,
                                                additional_allowed_ends_per_instance=addl_allowed_ends,
                                                constraints=constraints, return_elp=True)
                pred_labels = semimarkov_utils.spans_to_labels(pred_spans)
                # if self.args.sm_predict_single:
                #     # pred_spans: batch_size x T
                #     pred_labels_single = torch.zeros_like(pred_labels)
                #     for i in pred_labels.size(0):
                #         for lab in torch.unique(pred_labels[i,:lengths[i]]):
                #             #emission_scores: b x N x C
                #             pred_labels
                #             pass

                # if self.args.sm_constrain_transitions:
                #     all_pred_span_indices = [
                #         [ix for ix, count in this_rle_spans]
                #         for this_rle_spans in semimarkov_utils.rle_spans(pred_spans, lengths)
                #     ]
                #     for i, indices in enumerate(all_pred_span_indices):
                #         remove_cons_dups = [ix for ix, group in itertools.groupby(indices)
                #                             if not ix in test_data.corpus._background_indices]
                #         non_bg_indices = [
                #             ix for ix in test_data.corpus.indices_by_task(task)
                #             if ix not in test_data.corpus._background_indices
                #         ]
                #         if len(remove_cons_dups) != len(non_bg_indices) and lengths[i].item() != len(remove_cons_dups):
                #             print("deduped: {}, indices: {}, length {}".format(
                #                 remove_cons_dups, non_bg_indices, lengths[i].item()
                #             ))
                #             # assert lengths[i].item() < len(non_bg_indices)

                pred_labels_trim_s = self.model.trim(pred_labels, lengths, check_eos=True)
                return pred_labels_trim_s

            pred_labels_trim_s = predict(constraints_expanded)

            # assert len(pred_labels_trim_s) == 1, "batch size should be 1"
            for ix, (video, pred_labels_trim) in enumerate(zip(videos, pred_labels_trim_s)):
                preds = pred_labels_trim.numpy()
                predictions[video] = preds
                # if constraints_expanded is not None:
                #     this_cons = batch['constraints'][ix]
                #     if this_cons.sum() > 0:
                #         step_indices = test_data.get_ordered_indices_no_background()[task]
                #         for t, label in enumerate(preds):
                #             if label in step_indices:
                #                 label_ix = step_indices.index(label)
                #                 assert batch['constraints'][ix,t,label_ix] == 1
                assert self.model.n_classes not in predictions[video], "predictions should not contain EOS: {}".format(
                    predictions[video])
        return predictions
Exemplo n.º 8
0
    def fit(self, train_data: Datasplit, use_labels: bool, callback_fn=None):
        self.model.train()
        self.model.flatten_parameters()
        if use_labels:
            assert not self.args.sm_constrain_transitions
        initialize = True
        if use_labels and self.args.sm_supervised_method in ['closed-form', 'closed-then-gradient']:
            self.fit_supervised(train_data)
            if self.args.sm_supervised_method == 'closed-then-gradient':
                initialize = False
                callback_fn(-1, {})
            else:
                return
        if self.args.sm_init_non_projection_parameters_from:
            initialize = False
            if callback_fn:
                callback_fn(-1, {})
        optimizer, scheduler = make_optimizer(self.args, self.model.parameters())
        big_loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=True, batch_size=100)
        samp = next(iter(big_loader))
        big_features = samp['features']
        big_lengths = samp['lengths']
        if self.args.cuda:
            big_features = big_features.cuda()
            big_lengths = big_lengths.cuda()

        if initialize:
            self.model.initialize_gaussian(big_features, big_lengths)

        loader = make_data_loader(self.args, train_data, batch_by_task=True, shuffle=True, batch_size=self.args.batch_size)

        # print('{} videos in training data'.format(len(loader.dataset)))

        # all_features = [sample['features'] for batch in loader for sample in batch]
        # if self.args.cuda:
        #     all_features = [feats.cuda() for feats in all_features]

        C = self.n_classes
        K = self.args.sm_max_span_length

        for epoch in range(self.args.epochs):
            start_time = time.time()
            # call here since we may set eval in callback_fn
            self.model.train()
            losses = []
            multi_batch_losses = []
            nlls = []
            kls = []
            log_dets = []
            num_frames = 0
            num_videos = 0
            train_nll = 0
            train_kl = 0
            train_log_det = 0
            # for batch_ix, batch in enumerate(tqdm.tqdm(loader, ncols=80)):
            for batch_ix, batch in enumerate(loader):
                if self.args.train_limit and batch_ix >= self.args.train_limit:
                    break
                # if self.args.cuda:
                #     features = features.cuda()
                #     task_indices = task_indices.cuda()
                #     gt_single = gt_single.cuda()
                tasks = batch['task_name']
                videos = batch['video_name']
                features = batch['features']
                task_indices = batch['task_indices']
                lengths = batch['lengths']

                if 'train' in self.args.sm_constrain_with_narration:
                    assert all_equal(tasks)
                    constraints_expanded = self.expand_constraints(
                        train_data, tasks[0], task_indices[0], 1 - batch['constraints']
                    )
                    constraints_expanded *= self.args.sm_constrain_narration_weight
                else:
                    constraints_expanded = None

                num_frames += lengths.sum().item()
                num_videos += len(lengths)

                # assert len( task_indices) == self.n_classes, "remove_background and multi-task fit() not implemented"

                if self.args.cuda:
                    features = features.cuda()
                    lengths = lengths.cuda()
                    if constraints_expanded is not None:
                        constraints_expanded = constraints_expanded.cuda()

                if use_labels:
                    labels = batch['gt_single']
                    if self.args.cuda:
                        labels = labels.cuda()
                    spans = semimarkov_utils.labels_to_spans(labels, max_k=K)
                    use_mean_z = True
                else:
                    spans = None
                    use_mean_z = False

                addl_allowed_ends = self.make_additional_allowed_ends(tasks, lengths)

                ll, log_det = self.model.log_likelihood(features,
                                                 lengths,
                                                 valid_classes_per_instance=task_indices,
                                                 spans=spans,
                                                 add_eos=True,
                                                 use_mean_z=use_mean_z,
                                                 additional_allowed_ends_per_instance=addl_allowed_ends,
                                                 constraints=constraints_expanded)
                nll = -ll
                kl = self.model.kl.mean()
                if use_labels:
                    this_loss = nll - log_det
                else:
                    this_loss = nll - log_det + kl
                multi_batch_losses.append(this_loss)
                nlls.append(nll.item())
                kls.append(kl.item())
                log_dets.append(log_det.item())

                train_nll += (nll.item() * len(videos))
                train_kl += (kl.item() * len(videos))
                train_log_det += (log_det.item() * len(videos))

                losses.append(this_loss.item())

                if len(multi_batch_losses) >= self.args.batch_accumulation:
                    loss = sum(multi_batch_losses) / len(multi_batch_losses)
                    loss.backward()
                    multi_batch_losses = []

                    if self.args.print_every and (batch_ix % self.args.print_every == 0):
                        param_norm = sum([p.norm()**2 for p in self.model.parameters()
                                          if p.requires_grad]).item()**0.5
                        gparam_norm = sum([p.grad.norm()**2 for p in self.model.parameters()
                                           if p.requires_grad and p.grad is not None]).item()**0.5
                        log_str = 'Epoch: %02d, Batch: %03d/%03d, |Param|: %.6f, |GParam|: %.2f, lr: %.2E, ' + \
                                  'loss: %.4f, recon: %.4f, kl: %.4f, log_det: %.4f, recon_bound: %.2f, Throughput: %.2f vid / sec'
                        print(log_str %
                              (epoch, batch_ix, len(loader), param_norm, gparam_norm,
                               optimizer.param_groups[0]["lr"],
                               (train_nll + train_kl + train_log_det) / num_videos, # loss
                               train_nll / num_frames, # recon
                               train_kl / num_frames, # kl
                               train_log_det / num_videos, # log_det
                               (train_nll + train_kl) / num_frames, # recon_bound
                              num_videos / (time.time() - start_time))) # Throughput
                    if self.args.max_grad_norm is not None:
                        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    self.model.zero_grad()
            train_loss = np.mean(losses)
            if scheduler is not None:
                scheduler.step(train_loss)
            callback_fn(epoch, {'train_loss': train_loss,
                                'train_nll_frame_avg': train_nll / num_frames,
                                'train_kl_vid_avg': train_kl / num_videos,
                                'train_recon_bound': (train_nll + train_kl) / num_frames})
    def log_likelihood(self, features, lengths, valid_classes_per_instance, spans=None, add_eos=True, use_mean_z=False,
                       additional_allowed_ends_per_instance=None, constraints=None):
        if valid_classes_per_instance is not None:
            assert all_equal(set(vc.detach().cpu().numpy()) for vc in
                             valid_classes_per_instance), "must have same valid_classes for all instances in the batch"
            valid_classes = valid_classes_per_instance[0]
            C = len(valid_classes)
        else:
            valid_classes = None
            C = self.n_classes

        scores, log_det = self.score_features(features, lengths, valid_classes, add_eos=add_eos, use_mean_z=use_mean_z,
                                     additional_allowed_ends_per_instance=additional_allowed_ends_per_instance,
                                     constraints=constraints)

        K = scores.size(2)
        assert K <= self.max_k or (self.max_k == 1 and K == 2)

        if add_eos:
            eos_lengths = lengths + 1
            eos_spans = self.add_eos(spans, lengths) if spans is not None else spans
            eos_C = C + 1
        else:
            eos_lengths = lengths
            eos_spans = spans
            eos_C = C

        dist = SemiMarkovCRF(scores, lengths=eos_lengths)

        if eos_spans is not None:
            eos_spans_mapped = eos_spans.detach().cpu().clone()
            if valid_classes is not None:
                # unmap
                mapping = {cls.item(): index for index, cls in enumerate(valid_classes)}
                assert len(mapping) == len(valid_classes), "valid_classes must be unique"
                assert -1 not in mapping
                mapping[-1] = -1
                mapping[self.n_classes] = C  # map EOS
                if 0 not in mapping:
                    # TODO: hack, 0 sometimes will signify padding
                    mapping[0] = 0
                eos_spans_mapped.apply_(lambda x: mapping[x])
            # features = features[:,:this_N,:]
            # spans = spans[:,:this_N]
            parts = SemiMarkovCRF.struct.to_parts(eos_spans_mapped, (eos_C, K),
                                                  lengths=eos_lengths).type_as(scores)

            if self.args.sm_train_discriminatively:
                # this maximizes p(y | x)
                log_likelihood = dist.log_prob(parts).mean()
            else:
                # this maximizes p(x, y)
                d = parts.dim()
                batch_dims = range(d - len(dist.event_shape))
                log_likelihood = dist.struct().score(
                    dist.log_potentials,
                    parts.type_as(dist.log_potentials),
                    batch_dims=batch_dims,
                ).mean()
        else:
            log_likelihood = dist.partition.mean()
        return log_likelihood, log_det.mean()