示例#1
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from DatasetView
        | Usage:

        >>> ds_view = ds[5:15]
        >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]

        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        slice_list = [0] + slice_list if self.squeeze_dim else slice_list

        if not subpath:
            if len(slice_list) > 1:
                raise ValueError(
                    "Can't slice a dataset with multiple slices without subpath"
                )
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            return DatasetView(
                dataset=self.dataset,
                num_samples=num,
                offset=ofs + self.offset,
                squeeze_dim=isinstance(slice_list[0], int),
            )
        elif not slice_list:
            slice_ = slice(self.offset, self.offset + self.num_samples)
            if subpath in self.dataset._tensors.keys():
                return TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_,
                    squeeze_dims=[True] if self.squeeze_dim else [],
                )
            return self._get_dictionary(self.dataset, subpath, slice=slice_)
        else:
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            slice_list[0] = (
                ofs + self.offset
                if num == 1
                else slice(ofs + self.offset, ofs + self.offset + num)
            )
            if subpath in self.dataset._tensors.keys():
                return TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_list,
                    squeeze_dims=[True] if self.squeeze_dim else [],
                )
            if len(slice_list) > 1:
                raise ValueError("You can't slice a dictionary of Tensors")
            return self._get_dictionary(subpath, slice_list[0])
示例#2
0
 def __getitem__(self, slice_):
     """| Gets a slice or slices from dataset
     | Usage:
     >>> return ds["image", 5, 0:1920, 0:1080, 0:3].compute() # returns numpy array
     >>> images = ds["image"]
     >>> return images[5].compute() # returns numpy array
     >>> images = ds["image"]
     >>> image = images[5]
     >>> return image[0:1920, 0:1080, 0:3].compute()
     """
     if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
         slice_ = [slice_]
     slice_ = list(slice_)
     subpath, slice_list = slice_split(slice_)
     if not subpath:
         if len(slice_list) > 1:
             raise ValueError(
                 "Can't slice a dataset with multiple slices without subpath"
             )
         num, ofs = slice_extract_info(slice_list[0], self.shape[0])
         return DatasetView(
             dataset=self,
             num_samples=num,
             offset=ofs,
             squeeze_dim=isinstance(slice_list[0], int),
             lazy=self.lazy,
         )
     elif not slice_list:
         if subpath in self._tensors.keys():
             tensorview = TensorView(
                 dataset=self,
                 subpath=subpath,
                 slice_=slice(0, self.shape[0]),
                 lazy=self.lazy,
             )
             if self.lazy:
                 return tensorview
             else:
                 return tensorview.compute()
         return self._get_dictionary(subpath)
     else:
         num, ofs = slice_extract_info(slice_list[0], self.shape[0])
         if subpath in self._tensors.keys():
             tensorview = TensorView(dataset=self,
                                     subpath=subpath,
                                     slice_=slice_list,
                                     lazy=self.lazy)
             if self.lazy:
                 return tensorview
             else:
                 return tensorview.compute()
         if len(slice_list) > 1:
             raise ValueError("You can't slice a dictionary of Tensors")
         return self._get_dictionary(subpath, slice_list[0])
示例#3
0
    def __getitem__(self, slice_):
        """| Get an item to be computed without iterating on the whole dataset.
        | Creates a dataset view, then a temporary dataset to apply the transform.
        Parameters:
        ----------
        slice_: slice
            Gets a slice or slices from dataset
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]

        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if len(slice_list) == 0:
            slice_list = [slice(None, None, None)]

        num, ofs = slice_extract_info(slice_list[0], self.shape[0])

        ds_view = DatasetView(
            dataset=self._ds,
            num_samples=num,
            offset=ofs,
            squeeze_dim=isinstance(slice_list[0], int),
        )

        path = posixpath.expanduser("~/.activeloop/tmparray")
        new_ds = self.store(path, length=num, ds=ds_view, progressbar=False)

        index = 1 if len(slice_) > 1 else 0
        slice_[index] = (slice(None, None, None)
                         if not isinstance(slice_list[0], int) else 0
                         )  # Get all shape dimension since we already sliced
        return new_ds[slice_]
示例#4
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> ds_view = ds[5:15]
        >>> ds_view["image", 3, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets the 8th image
        """
        # handling strings and bytes
        assign_value = value
        assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)
        slice_list = [0] + slice_list if self.squeeze_dim else slice_list
        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif not slice_list:
            slice_ = (self.offset if self.num_samples == 1 else slice(
                self.offset, self.offset + self.num_samples))
            self.dataset._tensors[subpath][
                slice_] = assign_value  # Add path check
        else:
            num, ofs = (slice_extract_info(slice_list[0], self.num_samples)
                        if isinstance(slice_list[0], slice) else
                        (1, slice_list[0]))
            slice_list[0] = (slice(ofs + self.offset, ofs + self.offset +
                                   num) if num > 1 else ofs + self.offset)
            self.dataset._tensors[subpath][slice_list] = assign_value
示例#5
0
def test_dataset_utils():
    with pytest.raises(TypeError):
        slice_split([5.3])
    with pytest.raises(IndexError):
        slice_extract_info(5, 3)
    with pytest.raises(ValueError):
        slice_extract_info(slice(2, 10, -2), 3)
    with pytest.raises(IndexError):
        slice_extract_info(slice(20, 100), 3)
    with pytest.raises(IndexError):
        slice_extract_info(slice(1, 20), 3)
    with pytest.raises(IndexError):
        slice_extract_info(slice(4, 1), 10)
    slice_extract_info(slice(None, 10), 20)
    slice_extract_info(slice(20, None), 50)
示例#6
0
    def __getitem__(self, slice_):
        """| Gets a slice from an objectview"""
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        dataset = self.dataset
        nums, offsets, squeeze_dims, inner_schema_obj = (
            self.nums.copy(),
            self.offsets.copy(),
            self.squeeze_dims.copy(),
            self.inner_schema_obj,
        )

        if subpath:
            inner_schema_obj, nums, offsets, squeeze_dims = self.process_path(
                subpath, inner_schema_obj, nums, offsets, squeeze_dims)
        subpath = self.subpath + subpath
        if len(slice_list) >= 1:
            # Slice first dim
            if isinstance(self.dataset,
                          DatasetView) and not self.dataset.squeeze_dim:
                dataset = self.dataset[slice_list[0]]
                slice_list = slice_list[1:]
            elif not isinstance(self.dataset, DatasetView):
                num, ofs = slice_extract_info(slice_list[0],
                                              self.dataset.shape[0])
                dataset = DatasetView(self.dataset, num, ofs,
                                      isinstance(slice_list[0], int))
                slice_list = slice_list[1:]

            # Expand slice list for rest of dims
            if len(slice_list) >= 1:
                exp_slice_list = []
                for squeeze in squeeze_dims:
                    if squeeze:
                        exp_slice_list += [None]
                    else:
                        if len(slice_list) > 0:
                            exp_slice_list += [slice_list.pop(0)]
                        else:
                            # slice list smaller than max
                            exp_slice_list += [None]
                if len(slice_list) > 0:
                    # slice list longer than max
                    raise IndexError("Too many indices")
                for i, it in enumerate(exp_slice_list):
                    if it is not None:
                        num, ofs = slice_extract_info(it, nums[i])
                        nums[i] = num
                        offsets[i] += ofs
                        squeeze_dims[i] = num == 1
        objectview = ObjectView(
            dataset=dataset,
            subpath=subpath,
            slice_list=None,
            nums=nums,
            offsets=offsets,
            squeeze_dims=squeeze_dims,
            inner_schema_obj=inner_schema_obj,
            lazy=self.lazy,
            new=False,
        )
        return objectview if self.lazy else objectview.compute()
示例#7
0
    def __init__(
        self,
        dataset,
        subpath=None,
        slice_list=None,
        nums=[],
        offsets=[],
        squeeze_dims=[],
        inner_schema_obj=None,
        lazy=True,
        new=True,
    ):
        """Creates an ObjectView object for dataset from a Dataset, DatasetView or TensorView
        object, or creates a different ObjectView from an existing one

        Parameters
        ----------
        These parameters are used to create a new ObjectView.
        dataset: hub.api.dataset.Dataset object
            The dataset whose ObjectView is being created, or its DatasetView
        subpath: str (optional)
            A potentially incomplete path to any element in the Dataset
        slice_list: optional
            The `slice_` of this Tensor that needs to be accessed
        lazy: bool, optional
            Setting this to False will stop lazy computation and will allow items to be accessed without .compute()

        These parameters are also needed to create an ObjectView from an existing one.
        nums: List[int]
            Number of elements in each dimension of the ObjectView to be created
        offsets: List[int]
            Starting element in each dimension of the ObjectView to be created
        squeeze_dims: List[bool]
            Whether each dimension can be squeezed or not
        inner_schema_obj: Child of hub.schema.Tensor or hub.schema.SchemaDict
            The deepest element in the schema upto which the previous ObjectView had been processed

        new: bool
            Whether to create a new ObjectView object from a Dataset, DatasetView or TensorView
            or create a different ObjectView from an existing one
        """
        self.dataset = dataset
        self.schema = (dataset.schema.dict_
                       if not isinstance(dataset, DatasetView) else
                       dataset.dataset.schema.dict_)
        self.subpath = subpath

        self.nums = nums
        self.offsets = offsets
        self.squeeze_dims = squeeze_dims

        self.inner_schema_obj = inner_schema_obj
        self.lazy = lazy

        if new:
            # Creating new obj
            if self.subpath:
                (
                    self.inner_schema_obj,
                    self.nums,
                    self.offsets,
                    self.squeeze_dims,
                ) = self.process_path(
                    self.subpath,
                    self.inner_schema_obj,
                    self.nums.copy(),
                    self.offsets.copy(),
                    self.squeeze_dims.copy(),
                )
            # Check if dataset view needs to be made
            if slice_list and len(slice_list) >= 1:
                num, ofs = slice_extract_info(slice_list[0], dataset.shape[0])
                self.dataset = DatasetView(dataset, num, ofs,
                                           isinstance(slice_list[0], int))

            if slice_list and len(slice_list) > 1:
                slice_list = slice_list[1:]
                if len(slice_list) > len(self.nums):
                    raise IndexError("Too many indices")
                for i, it in enumerate(slice_list):
                    num, ofs = slice_extract_info(it, self.nums[i])
                    self.nums[i] = num
                    self.offsets[i] += ofs
                    self.squeeze_dims[i] = num == 1
示例#8
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from DatasetView
        | Usage:

        >>> ds_view = ds[5:15]
        >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]

        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        slice_list = [0] + slice_list if self.squeeze_dim else slice_list

        if not subpath:
            if len(slice_list) > 1:
                raise ValueError(
                    "Can't slice a dataset with multiple slices without subpath"
                )
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            return DatasetView(
                dataset=self.dataset,
                num_samples=num,
                offset=ofs + self.offset,
                squeeze_dim=isinstance(slice_list[0], int),
                lazy=self.lazy,
            )
        elif not slice_list:
            slice_ = (
                slice(self.offset, self.offset + self.num_samples)
                if not self.squeeze_dim
                else self.offset
            )
            if subpath in self.dataset._tensors.keys():
                tensorview = TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_,
                    lazy=self.lazy,
                )
                return tensorview if self.lazy else tensorview.compute()
            for key in self.dataset._tensors.keys():
                if subpath.startswith(key):
                    objectview = ObjectView(
                        dataset=self.dataset,
                        subpath=subpath,
                        slice_list=[slice_],
                        lazy=self.lazy,
                    )
                    return objectview if self.lazy else objectview.compute()
            return self._get_dictionary(self.dataset, subpath, slice=slice_)
        else:
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            slice_list[0] = (
                ofs + self.offset
                if isinstance(slice_list[0], int)
                else slice(ofs + self.offset, ofs + self.offset + num)
            )
            schema_obj = self.dataset.schema.dict_[subpath.split("/")[1]]

            if subpath in self.dataset._tensors.keys() and (
                not isinstance(schema_obj, Sequence) or len(slice_list) <= 1
            ):
                tensorview = TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_list,
                    lazy=self.lazy,
                )
                return tensorview if self.lazy else tensorview.compute()
            for key in self.dataset._tensors.keys():
                if subpath.startswith(key):
                    objectview = ObjectView(
                        dataset=self.dataset,
                        subpath=subpath,
                        slice_list=slice_list,
                        lazy=self.lazy,
                    )
                    return objectview if self.lazy else objectview.compute()
            if len(slice_list) > 1:
                raise ValueError("You can't slice a dictionary of Tensors")
            return self._get_dictionary(subpath, slice_list[0])
示例#9
0
    def __getitem__(self, slice_):
        """| Gets a slice from an objectview"""
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        nums, offsets, squeeze_dims, inner_schema_obj = (
            self.nums.copy(),
            self.offsets.copy(),
            self.squeeze_dims.copy(),
            self.inner_schema_obj,
        )

        if subpath:
            inner_schema_obj, nums, offsets, squeeze_dims = self.process_path(
                subpath, inner_schema_obj, nums, offsets, squeeze_dims)
        subpath = self.subpath + subpath

        new_indexes = self.indexes
        if len(slice_list) >= 1:
            if isinstance(self.indexes, list):
                new_indexes = self.indexes[slice_list[0]]
                if self.is_contiguous and new_indexes:
                    new_indexes = slice(new_indexes[0], new_indexes[-1] + 1)
                slice_list = slice_list[1:]
            elif isinstance(self.indexes, slice):
                ofs = self.indexes.start or 0
                num = self.indexes.stop - ofs if self.indexes.stop else None
                num, ofs_temp = slice_extract_info(slice_list[0], num)
                new_indexes = (ofs + ofs_temp if isinstance(
                    slice_list[0], int) else slice(ofs + ofs_temp, ofs +
                                                   ofs_temp + num))
                slice_list = slice_list[1:]

        if len(slice_list) >= 1:
            # Expand slice list
            exp_slice_list = []
            for squeeze in squeeze_dims:
                if squeeze:
                    exp_slice_list += [None]
                else:
                    if len(slice_list) > 0:
                        exp_slice_list += [slice_list.pop(0)]
                    else:
                        # slice list smaller than max
                        exp_slice_list += [None]
            if len(slice_list) > 0:
                # slice list longer than max
                raise IndexError("Too many indices")
            for i, it in enumerate(exp_slice_list):
                if it is not None:
                    num, ofs = slice_extract_info(it, nums[i])
                    nums[i] = num
                    offsets[i] += ofs
                    squeeze_dims[i] = isinstance(it, int)

        objectview = ObjectView(
            dataset=self.dataset,
            subpath=subpath,
            slice_=None,
            indexes=new_indexes,
            nums=nums,
            offsets=offsets,
            squeeze_dims=squeeze_dims,
            inner_schema_obj=inner_schema_obj,
            lazy=self.lazy,
            check_bounds=False,
        )
        return objectview if self.lazy else objectview.compute()