Exemplo n.º 1
0
class ClassedDataset(object):
    """
    Description:
      Constructor

    Parameters:
      dataset: Dataset to filter.
      labels: Whitelisting labels whose data points are retained
              post-filter.
      label_mapping: Optional label mapping from original labels to the new
                     filtered labels.
    """
    def __init__(self, dataset: Dataset, labels: Iterable[int],
                 label_mapping: Dict[int, int] = None):
        assert (labels is not None and len(labels) > 0), \
                'labels must not be NoneType or empty'
        
        # If not provided, create a mapping from original label range
        # to new label range:
        
        if label_mapping is None:
            label_mapping = {}
            new_label = 0
            for label in labels:
                label_mapping[label] = new_label
                new_label += 1
        
        self.label_mapping = label_mapping

        indices = []
        for idx, (_, label) in enumerate(dataset):
            if label in labels:
                indices.append(idx)
        
        self.dataset = Subset(dataset, indices)
    
    """
    Description:
      Returns the number of elements in the filtered dataset
    """
    def __len__(self):
        return len(self.dataset)
    
    """
    Description:
      Returns the element in the filtered dataset at the provided index
    
    Parameters:
      index: Index of element to retrieve
    """
    def __getitem__(self, index: int):
        data, label = self.dataset.__getitem__(index)
        label = self.label_mapping[label]
        return data, label
Exemplo n.º 2
0
 def __getitem__(self, index):
     return Subset.__getitem__(self, index)
Exemplo n.º 3
0
 def __getitem__(self, index: int) -> tuple:
     return Subset.__getitem__(self, index)
Exemplo n.º 4
0
class MNISTDataset2(SupervisedDataset):
    """

"""
    @property
    def response_shape(self) -> Tuple[int, ...]:
        """

:return:
:rtype:
"""
        return (len(self.categories), )

    @property
    def predictor_shape(self) -> Tuple[int, ...]:
        """

:return:
:rtype:
"""
        return self._resize_shape

    def __init__(
        self,
        dataset_path: Path,
        split: Split = Split.Training,
        validation: float = 0.3,
        resize_s: int = 28,
        seed: int = 42,
        download=True,
    ):
        """
:param dataset_path: dataset directory
:param split: train, valid, test
"""
        super().__init__()

        if not download:
            assert dataset_path.exists(), f"root: {dataset_path} not found."

        self._resize_shape = (1, resize_s, resize_s)

        train_trans = transforms.Compose([
            transforms.RandomResizedCrop(resize_s),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])
        val_trans = transforms.Compose([
            transforms.Resize(resize_s),
            # transforms.CenterCrop(resize_s),
            transforms.ToTensor(),
        ])

        if split == Split.Training:
            mnist_data = MNIST(str(dataset_path),
                               train=True,
                               download=download,
                               transform=train_trans)
        elif split == Split.Validation:
            mnist_data = MNIST(str(dataset_path),
                               train=True,
                               download=download,
                               transform=val_trans)
        else:
            mnist_data = MNIST(str(dataset_path),
                               train=False,
                               download=download,
                               transform=val_trans)

        if split != Split.Testing:
            torch.manual_seed(seed)
            train_ind, val_ind, test_ind = SplitByPercentage(
                len(mnist_data), validation=validation,
                testing=0.0).shuffled_indices()
            if split == Split.Validation:
                self.mnist_data_split = Subset(mnist_data, val_ind)
            else:
                self.mnist_data_split = Subset(mnist_data, train_ind)
        else:
            self.mnist_data_split = mnist_data

        self.categories = mnist_data.classes

    def __len__(self):
        return len(self.mnist_data_split)

    def __getitem__(self, index):
        return self.mnist_data_split.__getitem__(index)