def load_features(self):
        root_feat = Path(self.root_feat)
        feat_names = {key: self.visual_feat_paths(key) for key in
                      self.paths["feature_names"]}
        feat_names.update(self.paths["custom_paths"])
        features = {}
        for expert, rel_names in feat_names.items():
            if expert not in self.ordered_experts:
                continue
            feat_paths = tuple([root_feat / rel_name for rel_name in rel_names])
            if len(feat_paths) == 1:
                features[expert] = memcache(feat_paths[0])
            else:
                # support multiple forms of feature (e.g. max and avg pooling). For
                # now, we only support direct concatenation
                msg = f"{expert}: Only direct concatenation of muliple feats is possible"
                print(f"Concatenating aggregates for {expert}....")
                assert self.feat_aggregation[expert]["aggregate"] == "concat", msg
                axis = self.feat_aggregation[expert]["aggregate-axis"]
                x = concat_features.cache_info()  # pylint: disable=no-value-for-parameter
                print(f"concat cache info: {x}")
                features_ = concat_features(feat_paths, axis=axis)
                memory_summary()

                # Make separate feature copies for each split to allow in-place filtering
                features[expert] = copy.deepcopy(features_)

        self.features = features
        if self.challenge_mode:
            self.load_challenge_text_features()
        else:
            self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"])
            text_feat_path = root_feat / self.paths["text_feat_paths"][self.text_feat]
            self.text_features = memcache(text_feat_path)
    def load_features(self):
        root_feat = self.root_feat
        if self.distil_params is not None:
            self.distil_features = {}
            d_base_path = self.distil_params['base_path']

            teachers = list(
                map(lambda x: root_feat / Path(d_base_path + x),
                    self.distil_params['teachers']))

            for i, f_name in enumerate(teachers):
                self.distil_features[i] = memcache(f_name)

        feat_names = {
            key: self.visual_feat_paths(key)
            for key in self.paths["feature_names"]
        }
        feat_names.update(self.paths["custom_paths"])
        features = {}
        for expert, rel_names in feat_names.items():
            if expert not in self.ordered_experts:
                continue
            feat_paths = tuple(
                [Path(root_feat) / rel_name for rel_name in rel_names])
            if len(feat_paths) == 1:
                features[expert] = memcache(feat_paths[0])
            else:
                # support multiple forms of feature (e.g. max and avg pooling). For
                # now, we only support direct concatenation
                msg = f"{expert}: Only direct concatenation of muliple feats is possible"
                print(f"Concatenating aggregates for {expert}....")
                assert self.feat_aggregation[expert][
                    "aggregate"] == "concat", msg
                axis = self.feat_aggregation[expert]["aggregate-axis"]
                x = concat_features.cache_info()  # pylint: disable=no-value-for-parameter
                print(f"concat cache info: {x}")
                features_ = concat_features(feat_paths, axis=axis)
                memory_summary()

                # Make separate feature copies for each split to allow in-place filtering
                features[expert] = copy.deepcopy(features_)

        self.features = features
        if self.challenge_mode:
            self.load_challenge_text_features()
        else:
            text_feat_paths = self.paths["text_feat_paths"][self.text_feat]
            if isinstance(text_feat_paths, dict):
                text_features = memcache(root_feat / text_feat_paths["train"])
                text_features.update(
                    memcache(root_feat / text_feat_paths[self.split_name]))
            elif isinstance(text_feat_paths, (Path, str)):
                text_features = memcache(root_feat / text_feat_paths)
            else:
                raise TypeError(f"Unexpected type {type(text_feat_paths)}")
            self.text_features = text_features
            self.raw_captions = memcache(root_feat /
                                         self.paths["raw_captions_path"])
Пример #3
0
    def load_features(self):
        root_feat = Path(self.root_feat)
        feat_names = {key: self.visual_feat_paths(key) for key in
                      self.paths["feature_names"]}
        feat_names.update(self.paths["custom_paths"])
        features = {}
        for expert, rel_names in feat_names.items():
            if expert not in self.ordered_experts:
                continue
            feat_paths = tuple([root_feat / rel_name for rel_name in rel_names])
            if len(feat_paths) == 1:
                features[expert] = memcache(feat_paths[0])
            else:
                # support multiple forms of feature (e.g. max and avg pooling). For
                # now, we only support direct concatenation
                msg = f"{expert}: Only direct concatenation of muliple feats is possible"
                print(f"Concatenating aggregates for {expert}....")
                is_concat = self.feat_aggregation[expert]["aggregate"] == "concat"
                self.log_assert(is_concat, msg=msg)
                axis = self.feat_aggregation[expert]["aggregate-axis"]
                x = concat_features.cache_info()  # pylint: disable=no-value-for-parameter
                print(f"concat cache info: {x}")
                features_ = concat_features(feat_paths, axis=axis)
                memory_summary()

                # Make separate feature copies for each split to allow in-place filtering
                features[expert] = copy.deepcopy(features_)

        self.features = features
        if self.split_name == "jsfusion":
            self.restrict_test_captions = memcache(
                root_feat / self.paths["js_test_cap_idx_path"])

        self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"])
        self.text_features = memcache(root_feat / self.paths["text_feat_path"])

        if self.restrict_train_captions:
            # hash the video names to avoid O(n) lookups in long lists
            train_list = set(self.partition_lists["train"])
            for key, val in self.text_features.items():
                if key not in train_list:
                    continue

                if not self.split_name == "full-test":
                    # Note that we do not perform this sanity check for the full-test
                    # split, because the text features in the cached dataset will already
                    # have been cropped to the specified `resstrict_train_captions`
                    msg = "expected train text features to be lists with length 19 or 20"
                    has_expected_feats = isinstance(val, list) and len(val) in {19, 20}
                    self.log_assert(has_expected_feats, msg=msg)

                # restrict to the first N captions (deterministic)
                self.text_features[key] = val[:self.restrict_train_captions]
        self.summary_stats()
Пример #4
0
    def load_features(self):
        root_feat = self.root_feat
        feat_names = {
            key: self.visual_feat_paths(key)
            for key in self.paths["feature_names"]
        }
        feat_names.update(self.paths["custom_paths"])
        features = {}
        for expert, rel_names in feat_names.items():
            if expert not in self.ordered_experts:
                continue
            feat_paths = tuple(
                [Path(root_feat) / rel_name for rel_name in rel_names])
            if len(feat_paths) == 1:
                features[expert] = memcache(feat_paths[0])
            else:
                # support multiple forms of feature (e.g. max and avg pooling). For
                # now, we only support direct concatenation
                msg = f"{expert}: Only direct concatenation of muliple feats is possible"
                print(f"Concatenating aggregates for {expert}....")
                assert self.feat_aggregation[expert][
                    "aggregate"] == "concat", msg
                axis = self.feat_aggregation[expert]["aggregate-axis"]
                x = concat_features.cache_info()  # pylint: disable=no-value-for-parameter
                print(f"concat cache info: {x}")
                features_ = concat_features(feat_paths, axis=axis)
                memory_summary()

                # Make separate feature copies for each split to allow in-place filtering
                features[expert] = copy.deepcopy(features_)

        self.features = features
        if self.challenge_mode:
            self.load_challenge_text_features()
        else:
            self.raw_captions = memcache(root_feat /
                                         self.paths["raw_captions_path"])
            # keys = list(raw_captions.keys())
            # raw_captions_fused = {}
            # for key in keys:
            #     raw_captions_fused[key] = list(itertools.chain.from_iterable(raw_captions[key]))
            # self.raw_captions = raw_captions_fused
            text_feat_path = root_feat / self.paths["text_feat_paths"][
                self.text_feat]
            self.text_features = memcache(text_feat_path)

        # overload video paths, which are structured differently for YouCook2
        self.video_path_retrieval = [
            f"videos/{x}.mp4" for x in self.partition_lists["val"]
        ]
Пример #5
0
    def load_features(self):
        root_feat = Path(self.root_feat)
        feat_names = {
            key: self.visual_feat_paths(key)
            for key in self.paths["feature_names"]
        }
        feat_names.update(self.paths["custom_paths"])
        features = {}
        # modern, custom = LSMDC.supported_features(split_name=self.split_name)
        # feat_names = {key: self.visual_feat_paths(key) for key in modern}
        # feat_names.update(custom)
        # features = {}
        for expert, rel_names in feat_names.items():
            if expert not in self.ordered_experts:
                continue
            feat_paths = tuple(
                [root_feat / rel_name for rel_name in rel_names])
            if len(feat_paths) == 1:
                features[expert] = memcache(feat_paths[0])
            else:
                # support multiple forms of feature (e.g. max and avg pooling). For
                # now, we only support direct concatenation
                msg = f"{expert}: Only direct concatenation of muliple feats is possible"
                print(f"Concatenating aggregates for {expert}....")
                assert self.feat_aggregation[expert][
                    "aggregate"] == "concat", msg
                axis = self.feat_aggregation[expert]["aggregate-axis"]
                x = concat_features.cache_info()  # pylint: disable=no-value-for-parameter
                print(f"concat cache info: {x}")
                features_ = concat_features(feat_paths, axis=axis)
                memory_summary()

                # Make separate feature copies for each split to allow in-place filtering
                features[expert] = copy.deepcopy(features_)

        self.features = features
        # if self.text_feat == "openai":
        #     text_feat_name = "openai-feats.pkl"
        # elif self.text_feat == "w2v":
        #     text_feat_name = "w2v.pkl"
        # else:
        #     raise ValueError(f"Text features {self.text_feat} not supported.")
        # text_feat_path = Path(root_feat) / "aggregated_text_feats" / text_feat_name
        self.raw_captions = memcache(root_feat /
                                     self.paths["raw_captions_path"])
        self.text_features = memcache(root_feat / self.paths["text_feat_path"])
Пример #6
0
    def _train_epoch(self, epoch):
        """
        Training logic for an epoch

        :param epoch: Current training epoch.
        :return: A log that contains all information you want to save.

        Note:
            If you have additional information to record, for example:
                > additional_log = {"x": x, "y": y}
            merge it with log before return. i.e.
                > log = {**log, **additional_log}
                > return log

            The metrics in log must have the key 'metrics'.
        """
        total_loss = 0
        self.model.train()
        memory_summary()

        for batch_idx, minibatch in enumerate(self.data_loaders["train"]):
            for key, val in minibatch["experts"].items():
                minibatch["experts"][key] = val.to(self.device)

            for key in {"text", "text_token_mask"}:
                if key in minibatch:
                    minibatch[key] = minibatch[key].to(self.device)

            if "labels" in minibatch:
                labels = minibatch.pop("labels").to(self.device)

            self.optimizer.zero_grad()
            output = self.model(**minibatch)

            if "retrieval" in self.data_loaders.dataloaders:
                loss = self.loss(output["cross_view_conf_matrix"])
            else:
                loss = self.loss(x=output["class_preds"], target=labels)

            loss.backward()
            self.optimizer.step()

            sample_key = list(minibatch["experts"].keys())[0]
            batch_size = minibatch["experts"][sample_key].shape[0]
            self.seen["train"] += batch_size

            if not self.skip_tboard:
                # self.writer.set_step((epoch - 1) * self.len_epoch + batch_idx)
                self.writer.set_step(self.seen["train"], mode="train")
                self.writer.add_scalar('loss', loss.item())
            total_loss += loss.item()

            if batch_idx % self.log_step == 0:
                prog = self._progress(batch_idx)
                self.logger.info(f"Train Epoch: {epoch} {prog} Loss: {loss.item():.6f}")

            if batch_idx == self.len_epoch or (self.mini_train and batch_idx > 3):
                break

        log = {'loss': total_loss / self.len_epoch}
        if epoch % self.val_freq == 0:
            nested_log, cached_preds = self._valid_epoch(epoch)
            log.update(nested_log)
        else:
            nested_log, cached_preds = {}, None
            self.logger.info(f"skipping val for epoch: {epoch}")

        if self.lr_scheduler is not None:
            self.lr_scheduler.step()

        self.logger.info(f"LR {self.lr_scheduler.get_lr()}")
        return log, cached_preds
Пример #7
0
    def load_features(self):
        root_feat = Path(self.root_feat)
        feat_names = {key: self.visual_feat_paths(key) for key in
                      self.paths["feature_names"]}
        feat_names.update(self.paths["custom_paths"])
        # modern, custom = MSVD.supported_features(split_name=self.split_name)
        # feat_names = {key: self.visual_feat_paths(key) for key in modern}
        # feat_names.update(custom)
        # restrict to required experts
        features = {}
        for expert, rel_names in feat_names.items():
            if expert not in self.ordered_experts:
                continue
            feat_paths = tuple([root_feat / rel_name for rel_name in rel_names])
            if len(feat_paths) == 1:
                features[expert] = memcache(feat_paths[0])
            else:
                # support multiple forms of feature (e.g. max and avg pooling). For
                # now, we only support direct concatenation
                msg = f"{expert}: Only direct concat of muliple feats is possible"
                print(f"Concatenating aggregates for {expert}....")
                assert self.feat_aggregation[expert]["aggregate"] == "concat", msg
                axis = self.feat_aggregation[expert]["aggregate-axis"]
                x = concat_features.cache_info()  # pylint: disable=no-value-for-parameter
                print(f"concat cache info: {x}")
                features_ = concat_features(feat_paths, axis=axis)
                memory_summary()

                if expert == "speech":
                    features_defaults = defaultdict(lambda: np.zeros((1, 300)))
                    features_defaults.update(features_)
                    features_ = features_defaults

                # Make separate feature copies for each split to allow in-place filtering
                features[expert] = copy.deepcopy(features_)

        self.features = features
        text_feat_paths = self.paths["text_feat_paths"]
        text_features = memcache(root_feat / text_feat_paths["train"])
        split_names = {"dev": "val", "official": "test"}
        text_features.update(memcache(
            root_feat / text_feat_paths[split_names[self.split_name]]))
        key_map = memcache(pjoin(root_feat, self.paths["dict_youtube_mapping_path"]))
        inverse_map = {}
        for key, value in key_map.items():
            inverse_map[value] = key
        self.text_features = {inverse_map[key]: val for key, val in text_features.items()}
        self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"])

        if "detection" in self.ordered_experts:
            # Example processing
            processed = {}
            for key, subdict in self.features["detection"].items():
                box, conf = subdict["detection_boxes"], subdict["detection_scores"]
                raw = subdict["raw_feats_avg"]
                processed[key] = np.concatenate((box, conf.reshape(-1, 1), raw), axis=1)
            self.features["detection"] = processed

        if "openpose" in self.ordered_experts:
            # Example processing
            processed = {}
            for key, subdict in self.features["openpose"].items():
                raw = np.concatenate(subdict["matrix"], axis=1)
                processed[key] = raw.transpose(1, 0, 2).reshape(-1, 3 * 18)
            self.features["openpose"] = processed