def collate_fn(batch, ignore_lists=True, pad_before_stack=True, cat_tensors=False): r"""Puts each data field into a tensor with outer dimension batch size""" # taken from pytorch source code - pytorch data collater, but does not do anything with lists (avoids zip behavior) f = partial(collate_fn, ignore_lists=ignore_lists, pad_before_stack=pad_before_stack, cat_tensors=cat_tensors) elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) if cat_tensors: return torch.cat(batch, 0, out=out) else: if pad_before_stack: return pad_sequence(batch, batch_first=True) else: return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return f([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return {key: f([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(f(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): if ignore_lists: return batch else: transposed = zip(*batch) return [default_collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def default_collate1(batch): r"""Puts each data field into a tensor with outer dimension batch size""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return default_collate([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return {key: default_collate([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(default_collate(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): if elem_type.__name__ == 'tuple': if isinstance(elem[1], torch.Tensor): pass elif type( batch[1] ).__name__ == 'tuple': # if type(batch[1]).__name__ == 'tuple': return tuple(zip(*batch)) else: targets = [] imgs = [] for sample in batch: imgs.append(sample[0]) targets.append(torch.FloatTensor( sample[1])) # torch.FloatTensor(sample[1]) return torch.stack(imgs, 0), targets else: transposed = zip(*batch) return [default_collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def named_collate(batch): r"""Puts each data field into a tensor with outer dimension batch size. If named tensor adds batch to the first dimension.""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None # TODO: Named tensor: once stack supports named tensors, drop the rename. names = elem.names elem = elem.rename(None) if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.rename(None).numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) out_batch = torch.stack([_.rename(None) for _ in batch], 0, out=out) if any(names): new_names = tuple(["batch"] + list(names)) out_batch = out_batch.refine_names(*new_names) return out_batch elif ( elem_type.__module__ == "numpy" and elem_type.__name__ != "str_" and elem_type.__name__ != "string_" ): elem = batch[0] if elem_type.__name__ == "ndarray": # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError(default_collate_err_msg_format.format(elem.dtype)) return named_collate([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return {key: named_collate([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple return elem_type(*(named_collate(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): transposed = zip(*batch) return [named_collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def collate(batch): r"""Puts each data field into a tensor with outer dimension batch size""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): try: out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) return torch.stack(batch, 0, out=out) except Exception as e: return torch.nn.utils.rnn.pad_sequence(batch, batch_first=True) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return collate([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return {key: collate([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(collate(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): # check to make sure that the elements in batch have consistent size batch = [torch.stack(it) for it in batch] elem_sizes = [it.shape for it in batch] max_sizes = (max(sizes) for sizes in zip(*elem_sizes)) batched = torch.zeros(len(batch), *max_sizes, dtype=batch[0].dtype) for idx, (elem, elem_size) in enumerate(zip(batch, elem_sizes)): size_1, size_2 = elem_size batched[idx, :size_1, :size_2] = elem return batched raise TypeError(default_collate_err_msg_format.format(elem_type))
def cat_default_collate( batch ): # copy of pytorch's default collate except assumes batch elements are subbatches r"""Puts each data field into a tensor with outer dimension batch size""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) return torch.cat(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return default_collate([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, (TensorDict, TensorList)): return type(elem).merge(batch) elif isinstance(elem, container_abcs.Mapping): return {key: default_collate([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(default_collate(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): # check to make sure that the elements in batch have consistent size it = iter(batch) elem_size = len(next(it)) if not all(len(elem) == elem_size for elem in it): raise RuntimeError( 'each element in list of batch should be of equal size') transposed = zip(*batch) return [default_collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def collate(batch, padding_value=-100): r"""Puts each data field into a tensor with outer dimension batch size""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): try: out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) return torch.stack(batch, 0, out=out) except Exception as e: return torch.nn.utils.rnn.pad_sequence(batch, batch_first=True, padding_value=padding_value) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError(default_collate_err_msg_format.format(elem.dtype)) return collate([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): padding_mapping = {"input_ids": 1, "attention_mask": 0, "token_type_ids": 0, "labels": -100} return { key: collate([d[key] for d in batch], padding_value=padding_mapping.get(key, padding_value)) for key in elem } elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(collate(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): # check to make sure that the elements in batch have consistent size it = iter(batch) elem_size = len(next(it)) if not all(len(elem) == elem_size for elem in it): raise RuntimeError('each element in list of batch should be of equal size') transposed = zip(*batch) return [collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def dict_collate_fn(batch): # To keep each image's label as separate dictionaries, default pytorch behaviour will stack each key # Only modified one line of the pytorch 1.6.0 default collate function elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return dict_collate_fn([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return batch # !Only modified this line elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(dict_collate_fn(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): # check to make sure that the elements in batch have consistent size it = iter(batch) elem_size = len(next(it)) if not all(len(elem) == elem_size for elem in it): raise RuntimeError( 'each element in list of batch should be of equal size') transposed = zip(*batch) return [dict_collate_fn(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def collate_any(batch, key=None): r"""Puts each data field into a tensor with outer dimension batch size""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return collate_any([torch.as_tensor(b) for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return { key: collate_any([d[key] for d in batch], key=key) for key in elem } elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(collate_any(samples) for samples in zip(*batch))) elif key is not None: # we are not at top level, so NO TRANSPOSITION!!! # unchanged batches for example for keypoints, object_infos return batch elif isinstance(elem, container_abcs.Sequence): assert key is None, "You are transposing something at no-top level, which is WRONG" transposed = zip(*batch) return [collate_any(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def _collate(self, batch: Sequence[Any]) -> Any: elem = batch[0] elem_type = type(elem) if isinstance(elem, Tensor): out = None if torch.utils.data.get_worker_info() is not None: # type: ignore # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum(x.numel() for x in batch) storage = elem.storage()._new_shared(numel) out = elem.new(storage).resize_(len(batch), *list(elem.size())) ndims = elem.dim() if (ndims > 0) and ((ndims % 2) == 0): return torch.cat(batch, dim=0, out=out) # type: ignore return torch.stack(batch, dim=0, out=out) # type: ignore elif ( elem_type.__module__ == "numpy" and elem_type.__name__ != "str_" and elem_type.__name__ != "string_" ): elem = batch[0] if elem_type.__name__ == "ndarray": # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError(default_collate_err_msg_format.format(elem.dtype)) return self._collate([torch.as_tensor(b) for b in batch]) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, Mapping): return {key: self._collate([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple return elem_type(**(self._collate(samples) for samples in zip(*batch))) elif is_dataclass(elem): # dataclass return elem_type( **{ field.name: self._collate([getattr(d, field.name) for d in batch]) for field in fields(elem) } ) elif isinstance(elem, (tuple, list)): transposed = zip(*batch) return [self._collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def custom_collate_fn(batch): elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): return torch.cat(batch, dim=0) elif isinstance(elem, tuple): return elem_type(custom_collate_fn(samples) for samples in zip(*batch)) raise TypeError(default_collate_err_msg_format.format(elem_type))
def collate_function(batch): r"""Puts each data field into a tensor with outer dimension batch size""" elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None # TODO: support pytorch < 1.3 # if torch.utils.data.get_worker_info() is not None: # # If we're in a background process, concatenate directly into a # # shared memory tensor to avoid an extra copy # numel = sum([x.numel() for x in batch]) # storage = elem.storage()._new_shared(numel) # out = elem.new(storage) return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError(default_collate_err_msg_format.format(elem.dtype)) # return collate_function([torch.as_tensor(b) for b in batch]) return batch elif elem.shape == (): # scalars # return torch.as_tensor(batch) return batch elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int_classes): return torch.tensor(batch) elif isinstance(elem, string_classes): return batch elif isinstance(elem, container_abcs.Mapping): return {key: collate_function([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(collate_function(samples) for samples in zip(*batch))) elif isinstance(elem, container_abcs.Sequence): transposed = zip(*batch) return [collate_function(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))
def adaptive_collate(batch: Any) -> Any: elem = batch[0] elem_type = type(elem) if isinstance(elem, Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = elem.storage()._new_shared(numel) out = elem.new(storage) ndims = elem.dim() if ndims > 0 and ndims % 2 == 0: return torch.cat(batch, dim=0, out=out) else: return torch.stack(batch, dim=0, out=out) elif (elem_type.__module__ == "numpy" and elem_type.__name__ != "str_" and elem_type.__name__ != "string_"): elem = batch[0] if elem_type.__name__ == "ndarray": # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError( default_collate_err_msg_format.format(elem.dtype)) return adaptive_collate([torch.as_tensor(b) for b in batch]) elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64) elif isinstance(elem, int): return torch.tensor(batch) elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple return elem_type(*(adaptive_collate(samples) for samples in zip(*batch))) elif isinstance(elem, (tuple, list)): transposed = zip(*batch) return [adaptive_collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type))