def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None): """ Args: size (int): the total number of data of the underlying dataset to sample from shuffle (bool): whether to shuffle the indices or not seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ if not isinstance(size, int): raise TypeError( f"TrainingSampler(size=) expects an int. Got type {type(size)}." ) if size <= 0: raise ValueError( f"TrainingSampler(size=) expects a positive int. Got {size}.") self._size = size self._shuffle = shuffle if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size()
def __init__(self, cfg, sizes, dataset_dicts, seed: Optional[int] = None): """ Args: size (int): the total number of data of the underlying dataset to sample from seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ self.sizes = sizes dataset_ratio = cfg.DATALOADER.DATASET_RATIO self._batch_size = cfg.SOLVER.IMS_PER_BATCH assert len(dataset_ratio) == len(sizes), \ 'length of dataset ratio {} should be equal to number if dataset {}'.format( len(dataset_ratio), len(sizes) ) if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size() self._ims_per_gpu = self._batch_size // self._world_size self.dataset_ids = torch.tensor( [d['dataset_source'] for d in dataset_dicts], dtype=torch.long) dataset_weight = [torch.ones(s) * max(sizes) / s * r / sum(dataset_ratio) \ for i, (r, s) in enumerate(zip(dataset_ratio, sizes))] dataset_weight = torch.cat(dataset_weight) self.weights = dataset_weight self.sample_epoch_size = len(self.weights)
def __init__(self, dataset_dicts, seed: Optional[int] = None): """ """ self._size = len(dataset_dicts) assert self._size > 0 if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size() self.weights = self._get_class_balance_factor(dataset_dicts)
def __init__(self, dataset_dicts, seed: Optional[int] = None): """ Args: size (int): the total number of data of the underlying dataset to sample from seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ self._size = len(dataset_dicts) assert self._size > 0 if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size() self.weights = self._get_class_balance_factor(dataset_dicts)
def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None): """ Args: size (int): the total number of data of the underlying data to sample from shuffle (bool): whether to shuffle the indices or not seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ self._size = size assert size > 0 self._shuffle = shuffle if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size()
def __init__(self, repeat_factors, *, shuffle=True, seed=None): """ Args: repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``. shuffle (bool): whether to shuffle the indices or not seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ self._shuffle = shuffle if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size() # Split into whole number (_int_part) and fractional (_frac_part) parts. self._int_part = torch.trunc(repeat_factors) self._frac_part = repeat_factors - self._int_part
def __init__( self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None ): """ Args: cfg: config parameters dataset_dicts (list[dict]): annotations in Detectron2 dataset format. batch_size (int): Size of mini-batch. shuffle (bool): whether to shuffle the indices or not seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE self._shuffle = shuffle if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) # only sample the previous frame during eval self._rank = comm.get_rank() self._world_size = comm.get_world_size() self._total_size = len(dataset_dicts) total_batch_size = batch_size * self._world_size self._size = ( len(dataset_dicts) // total_batch_size ) * total_batch_size self._batch_size = batch_size self.num_per_worker = self._size // self._world_size self._dataset_dicts = dataset_dicts self._data_by_video = {} for i, data in enumerate(dataset_dicts): data["total_idx"] = i if data["video_id"] in self._data_by_video: self._data_by_video[data["video_id"]][data["index"]] = data else: self._data_by_video[data["video_id"]] = {data["index"]: data}
def __init__( self, size: int, subset_ratio: float, shuffle: bool = True, seed_shuffle: Optional[int] = None, seed_subset: Optional[int] = None, ): """ Args: size (int): the total number of data of the underlying dataset to sample from subset_ratio (float): the ratio of subset data to sample from the underlying dataset shuffle (bool): whether to shuffle the indices or not seed_shuffle (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). seed_subset (int): the seed to randomize the subset to be sampled. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ super().__init__(size=size, shuffle=shuffle, seed=seed_shuffle) assert 0.0 < subset_ratio <= 1.0 self._size_subset = int(size * subset_ratio) assert self._size_subset > 0 if seed_subset is None: seed_subset = comm.shared_random_seed() self._seed_subset = int(seed_subset) # randomly generate the subset indexes to be sampled from g = torch.Generator() g.manual_seed(self._seed_subset) indexes_randperm = torch.randperm(self._size, generator=g) self._indexes_subset = indexes_randperm[:self._size_subset] logger.info("Using RandomSubsetTrainingSampler......") logger.info( f"Randomly sample {self._size_subset} data from the original {self._size} data" )
def __init__(self, dataset_dicts, repeat_thresh, shuffle=True, seed=None): """ Args: dataset_dicts (list[dict]): annotations in Detectron2 dataset format. repeat_thresh (float): frequency threshold below which data is repeated. shuffle (bool): whether to shuffle the indices or not seed (int): the initial seed of the shuffle. Must be the same across all workers. If None, will use a random seed shared among workers (require synchronization among all workers). """ self._shuffle = shuffle if seed is None: seed = comm.shared_random_seed() self._seed = int(seed) self._rank = comm.get_rank() self._world_size = comm.get_world_size() # Get fractional repeat factors and split into whole number (_int_part) # and fractional (_frac_part) parts. rep_factors = self._get_repeat_factors(dataset_dicts, repeat_thresh) self._int_part = torch.trunc(rep_factors) self._frac_part = rep_factors - self._int_part
def shared_random_seed(): return comm.shared_random_seed()