class Metadata(types.SimpleNamespace): _logger = setup_logger(__name__) def __setattr__(self, key, val): if hasattr(self, key): oldval = getattr(self, key) if oldval != val : _logger.warn(f"Metadata '{key}' was updated!") super().__setattr__(key, val) def __hasattr__(self, key): return hasattr(self, key) def as_dict(self): return copy.copy(self.__dict__) def set(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) return self def get(self, key, default=None): try: return getattr(self, key) except AttributeError: _logger.warn("Metadata '{key}' was not found!") return default
def convert_basic_c2_names(original_keys): """ Apply some basic name conversion to names in C2 weights. It only deals with typical backbone models. Args: original_keys (list[str]): Returns: list[str]: The same number of strings matching those in original_keys. """ logger = setup_logger(__name__) layer_keys = copy.deepcopy(original_keys) layer_keys = [ {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys ] # some hard-coded mappings layer_keys = [k.replace("_", ".") for k in layer_keys] layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys] layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys] # Uniform both bn and gn names to "norm" layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys] layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys] layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys] layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys] layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys] layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys] layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys] layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys] layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys] layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys] # stem layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys] # to avoid mis-matching with "conv1" in other components (e.g. detection head) layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys] # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5) # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys] # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys] # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys] # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys] # blocks layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys] layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys] layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys] layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys] # fc layer_keys = [k.replace("fc1000.", "linear.") for k in layer_keys] # DensePose substitutions layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys] layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys] layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys] layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys] layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys] return layer_keys
def __init__(self, cfg, is_train=True): _logger = setup_logger(__name__) self.img_format = cfg.INPUT.FORMAT self.exif_transpose = cfg.INPUT.EXIF self.tfm_gens = build_transform_gen(cfg, is_train) _logger.info( f"TransformGens(Training={is_train}) : {str(self.tfm_gens)}") self.is_train = is_train
def load_tiny_annotations(data_root: str, anno_dir: str, dataset_name: str, is_split: bool): _logger = setup_logger(__name__, all_rank=True) meta = MetadataCatalog.get(dataset_name) class_names = meta.category_names dataset_dicts = [] if len(anno_dir): annotation_dirname = os.path.join(data_root, anno_dir) img_root = os.path.join(data_root, 'images') f = open(annotation_dirname, 'r') while True: line = f.readline() if not len(line): break line = line.split('\t') file_name, cls_name = line[:2] img_id = int(file_name.split('.')[0].split('_')[1]) record = { "file_name": os.path.join(img_root, file_name), "image_id": img_id, "annotations": class_names.index(cls_name), } dataset_dicts.append(record) else: annos = class_names if is_split else [''] for anno in annos: img_root = os.path.join(data_root, anno, 'images') img_list = os.listdir(img_root) for file_name in img_list: img_id = int(file_name.split('.')[0].split('_')[1]) record = { "file_name": os.path.join(img_root, file_name), "image_id": img_id, "annotations": class_names.index(anno) if len(anno) else -1, } dataset_dicts.append(record) _logger.info( f"Loaded {len(dataset_dicts)} images in Tiny ImageNet from {dataset_name}" ) return dataset_dicts
def serialize_to_tensor(data, group): backend = dist.get_backend(group) assert backend in ["gloo", "nccl"] device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) if len(buffer) > 1024**3: logger = setup_logger(__name__) logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}". format(get_rank(), len(buffer) / (1024**3), device)) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to(device=device) return tensor
def check_image_size(self, dataset_dict, image): if "width" in dataset_dict or "height" in dataset_dict: image_wh = (image.shape[1], image.shape[0]) expected_wh = (dataset_dict["width"], dataset_dict["height"]) if not image_wh == expected_wh: file_name = dataset_dict[ "file_name"] if 'file_name' in dataset_dict else '' _logger = setup_logger(__name__) _logger.critical( f"Mismatched (W,H){file_name}, got {image_wh}, expect {expected_wh}" ) if "width" not in dataset_dict: dataset_dict["width"] = image.shape[1] if "height" not in dataset_dict: dataset_dict["height"] = image.shape[0]
def filter_images_with_only_crowd_annotations(dataset_dicts): _logger = setup_logger(__name__, all_rank=True) num_before = len(dataset_dicts) def valid(anns): for ann in anns: if ann.get("iscrowd", 0) == 0: return True return False dataset_dicts = [x for x in dataset_dicts if valid(x["instances"])] num_after = len(dataset_dicts) _logger.info( f"Removed {num_before - num_after} images with no usable annotations. {num_after} images left." ) return dataset_dicts
def filter_images_with_difficult(dataset_dicts): _logger = setup_logger(__name__, all_rank=True) num_before = 0 num_after = 0 for dataset in dataset_dicts: num_before += len(dataset['instances']) dataset['instances'] = [ ann for ann in dataset['instances'] if ann.get("difficult", 0) == 0 ] num_after += len(dataset['instances']) # All Images have annotations after filtering _logger.info( f"Removed {num_before - num_after} annotations with difficult. {num_after} annotations left." ) return dataset_dicts
def __init__( self, model: nn.Module, save_dir: str = "", *, save_to_disk: bool = True, **checkpointables: object, ) -> None: if isinstance(model, (DistributedDataParallel, DataParallel)): model = model.module self.model = model self.save_dir = save_dir self.save_to_disk = save_to_disk self.checkpointables = copy.copy(checkpointables) self._logger = setup_logger(__name__) if not self.save_to_disk: self._logger.warning('No saving checkpoint mode') if not self.save_dir: self._logger.error('Not clarify saving directory') self.save_to_disk = False
def load_imagenet_annotations(data_root: str, dataset_name: str): _logger = setup_logger(__name__, all_rank=True) meta = MetadataCatalog.get(dataset_name) class_names = meta.category_names dataset_dicts = [] for anno in class_names: img_root = os.path.join(data_root, anno) img_list = os.listdir(img_root) for file_name in img_list: img_id = int(file_name.split('.')[0].split('_')[-1]) record = { "file_name": os.path.join(img_root, file_name), "image_id": img_id, "annotations" : class_names.index(anno), } dataset_dicts.append(record) _logger.info(f"Loaded {len(dataset_dicts)} images in ImageNet from {dataset_name}") return dataset_dicts
def convert_c2_detectron_names(weights): """ Map Caffe2 Detectron weight names to Detectron2 names. Args: weights (dict): name -> tensor Returns: dict: detectron2 names -> tensor dict: detectron2 names -> C2 names """ logger = setup_logger(__name__) logger.debug("Remapping C2 weights ......") original_keys = sorted(weights.keys()) layer_keys = copy.deepcopy(original_keys) layer_keys = convert_basic_c2_names(layer_keys) # -------------------------------------------------------------------------- # RPN hidden representation conv # -------------------------------------------------------------------------- # FPN case # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then # shared for all other levels, hence the appearance of "fpn2" layer_keys = [ k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys ] # Non-FPN case layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys] # -------------------------------------------------------------------------- # RPN box transformation conv # -------------------------------------------------------------------------- # FPN case (see note above about "fpn2") layer_keys = [ k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys ] layer_keys = [ k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits") for k in layer_keys ] # Non-FPN case layer_keys = [ k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys ] layer_keys = [ k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits") for k in layer_keys ] # -------------------------------------------------------------------------- # Fast R-CNN box head # -------------------------------------------------------------------------- layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys] layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys] layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys] layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys] # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys] # -------------------------------------------------------------------------- # FPN lateral and output convolutions # -------------------------------------------------------------------------- def fpn_map(name): """ Look for keys with the following patterns: 1) Starts with "fpn.inner." Example: "fpn.inner.res2.2.sum.lateral.weight" Meaning: These are lateral pathway convolutions 2) Starts with "fpn.res" Example: "fpn.res2.2.sum.weight" Meaning: These are FPN output convolutions """ splits = name.split(".") norm = ".norm" if "norm" in splits else "" if name.startswith("fpn.inner."): # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight'] stage = int(splits[2][len("res") :]) return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1]) elif name.startswith("fpn.res"): # splits example: ['fpn', 'res2', '2', 'sum', 'weight'] stage = int(splits[1][len("res") :]) return "fpn_output{}{}.{}".format(stage, norm, splits[-1]) return name layer_keys = [fpn_map(k) for k in layer_keys] # -------------------------------------------------------------------------- # Mask R-CNN mask head # -------------------------------------------------------------------------- # roi_heads.StandardROIHeads case layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys] layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys] layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys] # roi_heads.Res5ROIHeads case layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys] # -------------------------------------------------------------------------- # Keypoint R-CNN head # -------------------------------------------------------------------------- # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX" layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys] layer_keys = [ k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys ] layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys] # -------------------------------------------------------------------------- # Done with replacements # -------------------------------------------------------------------------- assert len(set(layer_keys)) == len(layer_keys) assert len(original_keys) == len(layer_keys) new_weights = {} new_keys_to_original_keys = {} for orig, renamed in zip(original_keys, layer_keys): new_keys_to_original_keys[renamed] = orig if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."): # remove the meaningless prediction weight for background class new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1 new_weights[renamed] = weights[orig][new_start_idx:] logger.debug( "Remove prediction weight for background class in {}. The shape changes from " "{} to {}.".format( renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape) ) ) elif renamed.startswith("cls_score."): # move weights of bg class from original index 0 to last index logger.debug( "Move classification weights for background class in {} from index 0 to " "index {}.".format(renamed, weights[orig].shape[0] - 1) ) new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]]) else: new_weights[renamed] = weights[orig] return new_weights, new_keys_to_original_keys
def load_voc_instances(dirname: str, split: str, dataset_name, filter=False): _logger = setup_logger(__name__, all_rank=True) meta = MetadataCatalog.get(dataset_name) class_names = meta.category_names with open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: fileids = np.loadtxt(f, dtype=np.str) annotation_dirname = os.path.join(dirname, "Annotations/") dataset_dicts = [] for fileid in fileids: anno_file = os.path.join(annotation_dirname, fileid + ".xml") jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") with open(anno_file) as f: tree = ET.parse(f) record = { "file_name": jpeg_file, "image_id": fileid, "height": int(tree.findall("./size/height")[0].text), "width": int(tree.findall("./size/width")[0].text), } objs = [] for obj in tree.findall("object"): cls = obj.find("name").text difficult = int(obj.find("difficult").text) bbox = obj.find("bndbox") bbox = [ float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"] ] # Original annotations are integers in the range [1, W or H] # Assuming they mean 1-based pixel indices (inclusive), # a box with annotation (xmin=1, xmax=W) covers the whole image. # In coordinate space this is represented by (xmin=0, xmax=W) bbox[0] -= 1.0 bbox[1] -= 1.0 objs.append({ "category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS, 'difficult': difficult }) record["instances"] = objs dataset_dicts.append(record) _logger.info( f"Loaded {len(dataset_dicts)} images in PASCAL VOC from {dirname}_{split}" ) if filter: dataset_dicts = filter_images_with_difficult(dataset_dicts) return dataset_dicts
def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion): logger = setup_logger(__name__) model_keys = sorted(list(model_state_dict.keys())) if c2_conversion=='Caffe2': ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict) # original_keys: the name in the original dict (before renaming) elif c2_conversion=='lukemelas': ckpt_state_dict, original_keys = convert_efficientnet_names(ckpt_state_dict) else: original_keys = {x: x for x in ckpt_state_dict.keys()} ckpt_keys = sorted(list(ckpt_state_dict.keys())) def match(a, b): # Matched ckpt_key should be a complete (starts with '.') suffix. # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1, # but matches whatever_conv1 or mesh_head.whatever_conv1. return a == b or a.endswith("." + b) # get a matrix of string matches, where each (i, j) entry correspond to the size of the # ckpt_key string, if it matches match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys] match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys)) # use the matched one with longest size in case of multiple matches max_match_size, idxs = match_matrix.max(1) # remove indices that correspond to no-match idxs[max_match_size == 0] = -1 # used for logging max_len_model = max(len(key) for key in model_keys) if model_keys else 1 max_len_ckpt = max(len(key) for key in ckpt_keys) if ckpt_keys else 1 log_str_template = "{: <{}} loaded from {: <{}} of shape {}" # matched_pairs (matched checkpoint key --> matched model key) matched_keys = {} msg = '' for idx_model, idx_ckpt in enumerate(idxs.tolist()): if idx_ckpt == -1: continue key_model = model_keys[idx_model] key_ckpt = ckpt_keys[idx_ckpt] value_ckpt = ckpt_state_dict[key_ckpt] shape_in_model = model_state_dict[key_model].shape if shape_in_model != value_ckpt.shape: logger.warning( "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( key_ckpt, value_ckpt.shape, key_model, shape_in_model ) ) logger.warning( "{} will not be loaded. Please double check and see if this is desired.".format( key_ckpt ) ) continue model_state_dict[key_model] = value_ckpt.clone() if key_ckpt in matched_keys: # already added to matched_keys logger.error( "Ambiguity found for {} in checkpoint!" "It matches at least two keys in the model ({} and {}).".format( key_ckpt, key_model, matched_keys[key_ckpt] ) ) raise ValueError("Cannot match one checkpoint key to multiple keys in the model.") matched_keys[key_ckpt] = key_model log_str = log_str_template.format( key_model, max_len_model, original_keys[key_ckpt], max_len_ckpt, tuple(shape_in_model), ) msg += f'\n{log_str}' logger.debug(f'align and update state dicts{msg}') matched_model_keys = matched_keys.values() matched_ckpt_keys = matched_keys.keys() # print warnings about unmatched keys on both side unmatched_model_keys = [k for k in model_keys if k not in matched_model_keys] if len(unmatched_model_keys): logger.debug(get_missing_parameters_message(unmatched_model_keys)) unmatched_ckpt_keys = [k for k in ckpt_keys if k not in matched_ckpt_keys] if len(unmatched_ckpt_keys): logger.debug( get_unexpected_parameters_message(original_keys[x] for x in unmatched_ckpt_keys) )
def convert_efficientnet_names(weights : Dict) -> Tuple[Dict[str, torch.Tensor], Dict[str, str]]: logger = setup_logger(__name__) logger.debug("Remapping EfficientNet weights ......") original_keys = sorted(weights.keys()) layer_keys = copy.deepcopy(original_keys) layer_keys = [re.sub('^_', '', k) for k in layer_keys] layer_keys = [re.sub('^conv_stem', 'stem.conv1', k) for k in layer_keys] layer_keys = [re.sub('^bn0', 'stem.conv1.norm', k) for k in layer_keys] layer_keys = [re.sub('^conv_head', 'head.conv1', k) for k in layer_keys] layer_keys = [re.sub('^bn1', 'head.conv1.norm', k) for k in layer_keys] layer_keys = [re.sub('^blocks\\.', 'block', k) for k in layer_keys] # Expand Convolution layer_keys = [k.replace('_expand_conv', 'expand_conv') for k in layer_keys] layer_keys = [k.replace('_bn0', 'expand_conv.norm') for k in layer_keys] # Depthwise Convolution layer_keys = [k.replace('_depthwise_conv', 'depthwise_conv') for k in layer_keys] layer_keys = [k.replace('_bn1', 'depthwise_conv.norm') for k in layer_keys] # Project Convolution layer_keys = [k.replace('_project_conv', 'project_conv') for k in layer_keys] layer_keys = [k.replace('_bn2', 'project_conv.norm') for k in layer_keys] # Squeeze and Excitation layer_keys = [re.sub('_se_reduce', 'SEblock.reduce', k) for k in layer_keys] layer_keys = [re.sub('_se_expand', 'SEblock.expand', k) for k in layer_keys] layer_keys = [re.sub('^fc', 'linear', k) for k in layer_keys] # -------------------------------------------------------------------------- # Done with replacements # -------------------------------------------------------------------------- assert len(set(layer_keys)) == len(layer_keys) assert len(original_keys) == len(layer_keys) new_weights = {} new_keys_to_original_keys = {} for orig, renamed in zip(original_keys, layer_keys): new_keys_to_original_keys[renamed] = orig if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."): # remove the meaningless prediction weight for background class new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1 new_weights[renamed] = weights[orig][new_start_idx:] logger.debug( "Remove prediction weight for background class in {}. The shape changes from " "{} to {}.".format( renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape) ) ) elif renamed.startswith("cls_score."): # move weights of bg class from original index 0 to last index logger.debug( "Move classification weights for background class in {} from index 0 to " "index {}.".format(renamed, weights[orig].shape[0] - 1) ) new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]]) else: new_weights[renamed] = weights[orig] return new_weights, new_keys_to_original_keys
def load_coco_json(json_file, image_root, dataset_name, filter=True, extra_annotation_keys=None): _logger = setup_logger(__name__, all_rank=True) start_time = time.time() with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) end_time = time.time() if end_time - start_time > 1: _logger.info( f"Loading {json_file} takes {end_time - start_time:.2f} seconds.") meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) category_names = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] meta.category_names = category_names id_map = {v: i for i, v in enumerate(cat_ids)} meta.dataset_id_to_contiguous_id = id_map img_ids = sorted(list(coco_api.imgs.keys())) # list : 'license', 'url', 'file_name', 'height', 'width', 'id', 'date_captured' imgs = coco_api.loadImgs(img_ids) # list : 'segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id' anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] imgs_anns = list(zip(imgs, anns)) _logger.info( f"Loaded {len(imgs_anns)} images in COCO format from {json_file}") dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys or []) for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0 obj = {key: anno[key] for key in ann_keys if key in anno} obj["bbox_mode"] = BoxMode.XYWH_ABS obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["instances"] = objs dataset_dicts.append(record) if filter: dataset_dicts = filter_images_with_only_crowd_annotations( dataset_dicts) return dataset_dicts
import copy import numpy as np import random import pickle from typing import List, Dict from vistem.utils.logger import setup_logger from vistem.utils.serialize import PicklableWrapper __all__ = [ 'ListDataset', 'DictionaryDataset', 'MapDataset', 'AspectRatioGroupedDataset' ] _logger = setup_logger(__name__) class ListDataset(Dataset): def __init__(self, cfg, data: List[Dict], copy: bool = True, serialize: bool = True): self._data = data self._copy = copy self._serialize = serialize def _serialize(data): buffer = pickle.dumps(data, protocol=-1)