def load_features(self): root_feat = Path(self.root_feat) feat_names = {key: self.visual_feat_paths(key) for key in self.paths["feature_names"]} feat_names.update(self.paths["custom_paths"]) features = {} for expert, rel_names in feat_names.items(): if expert not in self.ordered_experts: continue feat_paths = tuple([root_feat / rel_name for rel_name in rel_names]) if len(feat_paths) == 1: features[expert] = memcache(feat_paths[0]) else: # support multiple forms of feature (e.g. max and avg pooling). For # now, we only support direct concatenation msg = f"{expert}: Only direct concatenation of muliple feats is possible" print(f"Concatenating aggregates for {expert}....") assert self.feat_aggregation[expert]["aggregate"] == "concat", msg axis = self.feat_aggregation[expert]["aggregate-axis"] x = concat_features.cache_info() # pylint: disable=no-value-for-parameter print(f"concat cache info: {x}") features_ = concat_features(feat_paths, axis=axis) memory_summary() # Make separate feature copies for each split to allow in-place filtering features[expert] = copy.deepcopy(features_) self.features = features if self.challenge_mode: self.load_challenge_text_features() else: self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"]) text_feat_path = root_feat / self.paths["text_feat_paths"][self.text_feat] self.text_features = memcache(text_feat_path)
def load_features(self): root_feat = self.root_feat if self.distil_params is not None: self.distil_features = {} d_base_path = self.distil_params['base_path'] teachers = list( map(lambda x: root_feat / Path(d_base_path + x), self.distil_params['teachers'])) for i, f_name in enumerate(teachers): self.distil_features[i] = memcache(f_name) feat_names = { key: self.visual_feat_paths(key) for key in self.paths["feature_names"] } feat_names.update(self.paths["custom_paths"]) features = {} for expert, rel_names in feat_names.items(): if expert not in self.ordered_experts: continue feat_paths = tuple( [Path(root_feat) / rel_name for rel_name in rel_names]) if len(feat_paths) == 1: features[expert] = memcache(feat_paths[0]) else: # support multiple forms of feature (e.g. max and avg pooling). For # now, we only support direct concatenation msg = f"{expert}: Only direct concatenation of muliple feats is possible" print(f"Concatenating aggregates for {expert}....") assert self.feat_aggregation[expert][ "aggregate"] == "concat", msg axis = self.feat_aggregation[expert]["aggregate-axis"] x = concat_features.cache_info() # pylint: disable=no-value-for-parameter print(f"concat cache info: {x}") features_ = concat_features(feat_paths, axis=axis) memory_summary() # Make separate feature copies for each split to allow in-place filtering features[expert] = copy.deepcopy(features_) self.features = features if self.challenge_mode: self.load_challenge_text_features() else: text_feat_paths = self.paths["text_feat_paths"][self.text_feat] if isinstance(text_feat_paths, dict): text_features = memcache(root_feat / text_feat_paths["train"]) text_features.update( memcache(root_feat / text_feat_paths[self.split_name])) elif isinstance(text_feat_paths, (Path, str)): text_features = memcache(root_feat / text_feat_paths) else: raise TypeError(f"Unexpected type {type(text_feat_paths)}") self.text_features = text_features self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"])
def load_features(self): root_feat = Path(self.root_feat) feat_names = {key: self.visual_feat_paths(key) for key in self.paths["feature_names"]} feat_names.update(self.paths["custom_paths"]) features = {} for expert, rel_names in feat_names.items(): if expert not in self.ordered_experts: continue feat_paths = tuple([root_feat / rel_name for rel_name in rel_names]) if len(feat_paths) == 1: features[expert] = memcache(feat_paths[0]) else: # support multiple forms of feature (e.g. max and avg pooling). For # now, we only support direct concatenation msg = f"{expert}: Only direct concatenation of muliple feats is possible" print(f"Concatenating aggregates for {expert}....") is_concat = self.feat_aggregation[expert]["aggregate"] == "concat" self.log_assert(is_concat, msg=msg) axis = self.feat_aggregation[expert]["aggregate-axis"] x = concat_features.cache_info() # pylint: disable=no-value-for-parameter print(f"concat cache info: {x}") features_ = concat_features(feat_paths, axis=axis) memory_summary() # Make separate feature copies for each split to allow in-place filtering features[expert] = copy.deepcopy(features_) self.features = features if self.split_name == "jsfusion": self.restrict_test_captions = memcache( root_feat / self.paths["js_test_cap_idx_path"]) self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"]) self.text_features = memcache(root_feat / self.paths["text_feat_path"]) if self.restrict_train_captions: # hash the video names to avoid O(n) lookups in long lists train_list = set(self.partition_lists["train"]) for key, val in self.text_features.items(): if key not in train_list: continue if not self.split_name == "full-test": # Note that we do not perform this sanity check for the full-test # split, because the text features in the cached dataset will already # have been cropped to the specified `resstrict_train_captions` msg = "expected train text features to be lists with length 19 or 20" has_expected_feats = isinstance(val, list) and len(val) in {19, 20} self.log_assert(has_expected_feats, msg=msg) # restrict to the first N captions (deterministic) self.text_features[key] = val[:self.restrict_train_captions] self.summary_stats()
def load_features(self): root_feat = self.root_feat feat_names = { key: self.visual_feat_paths(key) for key in self.paths["feature_names"] } feat_names.update(self.paths["custom_paths"]) features = {} for expert, rel_names in feat_names.items(): if expert not in self.ordered_experts: continue feat_paths = tuple( [Path(root_feat) / rel_name for rel_name in rel_names]) if len(feat_paths) == 1: features[expert] = memcache(feat_paths[0]) else: # support multiple forms of feature (e.g. max and avg pooling). For # now, we only support direct concatenation msg = f"{expert}: Only direct concatenation of muliple feats is possible" print(f"Concatenating aggregates for {expert}....") assert self.feat_aggregation[expert][ "aggregate"] == "concat", msg axis = self.feat_aggregation[expert]["aggregate-axis"] x = concat_features.cache_info() # pylint: disable=no-value-for-parameter print(f"concat cache info: {x}") features_ = concat_features(feat_paths, axis=axis) memory_summary() # Make separate feature copies for each split to allow in-place filtering features[expert] = copy.deepcopy(features_) self.features = features if self.challenge_mode: self.load_challenge_text_features() else: self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"]) # keys = list(raw_captions.keys()) # raw_captions_fused = {} # for key in keys: # raw_captions_fused[key] = list(itertools.chain.from_iterable(raw_captions[key])) # self.raw_captions = raw_captions_fused text_feat_path = root_feat / self.paths["text_feat_paths"][ self.text_feat] self.text_features = memcache(text_feat_path) # overload video paths, which are structured differently for YouCook2 self.video_path_retrieval = [ f"videos/{x}.mp4" for x in self.partition_lists["val"] ]
def load_features(self): root_feat = Path(self.root_feat) feat_names = { key: self.visual_feat_paths(key) for key in self.paths["feature_names"] } feat_names.update(self.paths["custom_paths"]) features = {} # modern, custom = LSMDC.supported_features(split_name=self.split_name) # feat_names = {key: self.visual_feat_paths(key) for key in modern} # feat_names.update(custom) # features = {} for expert, rel_names in feat_names.items(): if expert not in self.ordered_experts: continue feat_paths = tuple( [root_feat / rel_name for rel_name in rel_names]) if len(feat_paths) == 1: features[expert] = memcache(feat_paths[0]) else: # support multiple forms of feature (e.g. max and avg pooling). For # now, we only support direct concatenation msg = f"{expert}: Only direct concatenation of muliple feats is possible" print(f"Concatenating aggregates for {expert}....") assert self.feat_aggregation[expert][ "aggregate"] == "concat", msg axis = self.feat_aggregation[expert]["aggregate-axis"] x = concat_features.cache_info() # pylint: disable=no-value-for-parameter print(f"concat cache info: {x}") features_ = concat_features(feat_paths, axis=axis) memory_summary() # Make separate feature copies for each split to allow in-place filtering features[expert] = copy.deepcopy(features_) self.features = features # if self.text_feat == "openai": # text_feat_name = "openai-feats.pkl" # elif self.text_feat == "w2v": # text_feat_name = "w2v.pkl" # else: # raise ValueError(f"Text features {self.text_feat} not supported.") # text_feat_path = Path(root_feat) / "aggregated_text_feats" / text_feat_name self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"]) self.text_features = memcache(root_feat / self.paths["text_feat_path"])
def _train_epoch(self, epoch): """ Training logic for an epoch :param epoch: Current training epoch. :return: A log that contains all information you want to save. Note: If you have additional information to record, for example: > additional_log = {"x": x, "y": y} merge it with log before return. i.e. > log = {**log, **additional_log} > return log The metrics in log must have the key 'metrics'. """ total_loss = 0 self.model.train() memory_summary() for batch_idx, minibatch in enumerate(self.data_loaders["train"]): for key, val in minibatch["experts"].items(): minibatch["experts"][key] = val.to(self.device) for key in {"text", "text_token_mask"}: if key in minibatch: minibatch[key] = minibatch[key].to(self.device) if "labels" in minibatch: labels = minibatch.pop("labels").to(self.device) self.optimizer.zero_grad() output = self.model(**minibatch) if "retrieval" in self.data_loaders.dataloaders: loss = self.loss(output["cross_view_conf_matrix"]) else: loss = self.loss(x=output["class_preds"], target=labels) loss.backward() self.optimizer.step() sample_key = list(minibatch["experts"].keys())[0] batch_size = minibatch["experts"][sample_key].shape[0] self.seen["train"] += batch_size if not self.skip_tboard: # self.writer.set_step((epoch - 1) * self.len_epoch + batch_idx) self.writer.set_step(self.seen["train"], mode="train") self.writer.add_scalar('loss', loss.item()) total_loss += loss.item() if batch_idx % self.log_step == 0: prog = self._progress(batch_idx) self.logger.info(f"Train Epoch: {epoch} {prog} Loss: {loss.item():.6f}") if batch_idx == self.len_epoch or (self.mini_train and batch_idx > 3): break log = {'loss': total_loss / self.len_epoch} if epoch % self.val_freq == 0: nested_log, cached_preds = self._valid_epoch(epoch) log.update(nested_log) else: nested_log, cached_preds = {}, None self.logger.info(f"skipping val for epoch: {epoch}") if self.lr_scheduler is not None: self.lr_scheduler.step() self.logger.info(f"LR {self.lr_scheduler.get_lr()}") return log, cached_preds
def load_features(self): root_feat = Path(self.root_feat) feat_names = {key: self.visual_feat_paths(key) for key in self.paths["feature_names"]} feat_names.update(self.paths["custom_paths"]) # modern, custom = MSVD.supported_features(split_name=self.split_name) # feat_names = {key: self.visual_feat_paths(key) for key in modern} # feat_names.update(custom) # restrict to required experts features = {} for expert, rel_names in feat_names.items(): if expert not in self.ordered_experts: continue feat_paths = tuple([root_feat / rel_name for rel_name in rel_names]) if len(feat_paths) == 1: features[expert] = memcache(feat_paths[0]) else: # support multiple forms of feature (e.g. max and avg pooling). For # now, we only support direct concatenation msg = f"{expert}: Only direct concat of muliple feats is possible" print(f"Concatenating aggregates for {expert}....") assert self.feat_aggregation[expert]["aggregate"] == "concat", msg axis = self.feat_aggregation[expert]["aggregate-axis"] x = concat_features.cache_info() # pylint: disable=no-value-for-parameter print(f"concat cache info: {x}") features_ = concat_features(feat_paths, axis=axis) memory_summary() if expert == "speech": features_defaults = defaultdict(lambda: np.zeros((1, 300))) features_defaults.update(features_) features_ = features_defaults # Make separate feature copies for each split to allow in-place filtering features[expert] = copy.deepcopy(features_) self.features = features text_feat_paths = self.paths["text_feat_paths"] text_features = memcache(root_feat / text_feat_paths["train"]) split_names = {"dev": "val", "official": "test"} text_features.update(memcache( root_feat / text_feat_paths[split_names[self.split_name]])) key_map = memcache(pjoin(root_feat, self.paths["dict_youtube_mapping_path"])) inverse_map = {} for key, value in key_map.items(): inverse_map[value] = key self.text_features = {inverse_map[key]: val for key, val in text_features.items()} self.raw_captions = memcache(root_feat / self.paths["raw_captions_path"]) if "detection" in self.ordered_experts: # Example processing processed = {} for key, subdict in self.features["detection"].items(): box, conf = subdict["detection_boxes"], subdict["detection_scores"] raw = subdict["raw_feats_avg"] processed[key] = np.concatenate((box, conf.reshape(-1, 1), raw), axis=1) self.features["detection"] = processed if "openpose" in self.ordered_experts: # Example processing processed = {} for key, subdict in self.features["openpose"].items(): raw = np.concatenate(subdict["matrix"], axis=1) processed[key] = raw.transpose(1, 0, 2).reshape(-1, 3 * 18) self.features["openpose"] = processed