class ClassedDataset(object): """ Description: Constructor Parameters: dataset: Dataset to filter. labels: Whitelisting labels whose data points are retained post-filter. label_mapping: Optional label mapping from original labels to the new filtered labels. """ def __init__(self, dataset: Dataset, labels: Iterable[int], label_mapping: Dict[int, int] = None): assert (labels is not None and len(labels) > 0), \ 'labels must not be NoneType or empty' # If not provided, create a mapping from original label range # to new label range: if label_mapping is None: label_mapping = {} new_label = 0 for label in labels: label_mapping[label] = new_label new_label += 1 self.label_mapping = label_mapping indices = [] for idx, (_, label) in enumerate(dataset): if label in labels: indices.append(idx) self.dataset = Subset(dataset, indices) """ Description: Returns the number of elements in the filtered dataset """ def __len__(self): return len(self.dataset) """ Description: Returns the element in the filtered dataset at the provided index Parameters: index: Index of element to retrieve """ def __getitem__(self, index: int): data, label = self.dataset.__getitem__(index) label = self.label_mapping[label] return data, label
def __getitem__(self, index): return Subset.__getitem__(self, index)
def __getitem__(self, index: int) -> tuple: return Subset.__getitem__(self, index)
class MNISTDataset2(SupervisedDataset): """ """ @property def response_shape(self) -> Tuple[int, ...]: """ :return: :rtype: """ return (len(self.categories), ) @property def predictor_shape(self) -> Tuple[int, ...]: """ :return: :rtype: """ return self._resize_shape def __init__( self, dataset_path: Path, split: Split = Split.Training, validation: float = 0.3, resize_s: int = 28, seed: int = 42, download=True, ): """ :param dataset_path: dataset directory :param split: train, valid, test """ super().__init__() if not download: assert dataset_path.exists(), f"root: {dataset_path} not found." self._resize_shape = (1, resize_s, resize_s) train_trans = transforms.Compose([ transforms.RandomResizedCrop(resize_s), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) val_trans = transforms.Compose([ transforms.Resize(resize_s), # transforms.CenterCrop(resize_s), transforms.ToTensor(), ]) if split == Split.Training: mnist_data = MNIST(str(dataset_path), train=True, download=download, transform=train_trans) elif split == Split.Validation: mnist_data = MNIST(str(dataset_path), train=True, download=download, transform=val_trans) else: mnist_data = MNIST(str(dataset_path), train=False, download=download, transform=val_trans) if split != Split.Testing: torch.manual_seed(seed) train_ind, val_ind, test_ind = SplitByPercentage( len(mnist_data), validation=validation, testing=0.0).shuffled_indices() if split == Split.Validation: self.mnist_data_split = Subset(mnist_data, val_ind) else: self.mnist_data_split = Subset(mnist_data, train_ind) else: self.mnist_data_split = mnist_data self.categories = mnist_data.classes def __len__(self): return len(self.mnist_data_split) def __getitem__(self, index): return self.mnist_data_split.__getitem__(index)