Exemplo n.º 1
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:
        >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        >>> images = ds["image"]
        >>> image = images[5]
        >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        """
        if "r" in self._mode:
            raise ReadModeException("__setitem__")
        self._auto_checkout()

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in the dataset")

        assign_value = get_value(value)
        schema_dict = self.schema
        if subpath[1:] in schema_dict.dict_.keys():
            schema_key = schema_dict.dict_.get(subpath[1:], None)
        else:
            for schema_key in subpath[1:].split("/"):
                schema_dict = schema_dict.dict_.get(schema_key, None)
                if not isinstance(schema_dict, SchemaDict):
                    schema_key = schema_dict
        if isinstance(schema_key, ClassLabel):
            assign_value = check_class_label(assign_value, schema_key)
        if isinstance(
                schema_key,
            (Text, bytes)) or (isinstance(assign_value, Iterable) and any(
                isinstance(val, str) for val in assign_value)):
            # handling strings and bytes
            assign_value = str_to_int(assign_value, self.tokenizer)

        if not slice_list:
            self._tensors[subpath][:] = assign_value
        else:
            self._tensors[subpath][slice_list] = assign_value
Exemplo n.º 2
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> images_tensorview[7, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets 7th image
        """
        self.dataset._auto_checkout()

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)
        if subpath:
            raise ValueError("Can't setitem of TensorView with subpath")

        assign_value = get_value(value)
        schema_dict = self.dataset.schema
        if subpath[1:] in schema_dict.dict_.keys():
            schema_key = schema_dict.dict_.get(subpath[1:], None)
        else:
            for schema_key in subpath[1:].split("/"):
                schema_dict = schema_dict.dict_.get(schema_key, None)
                if not isinstance(schema_dict, SchemaDict):
                    schema_key = schema_dict
        if isinstance(schema_key, ClassLabel):
            assign_value = check_class_label(assign_value, schema_key)
        if isinstance(
                schema_key,
            (Text, bytes)) or (isinstance(assign_value, Iterable) and any(
                isinstance(val, str) for val in assign_value)):
            # handling strings and bytes
            assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        new_nums = self.nums.copy()
        new_offsets = self.offsets.copy()
        if isinstance(self.indexes, list):
            new_indexes = self.indexes[slice_list[0]]
            if self.is_contiguous and new_indexes:
                new_indexes = slice(new_indexes[0], new_indexes[-1] + 1)
        elif isinstance(self.indexes, int):
            new_indexes = self.indexes
        else:
            ofs = self.indexes.start or 0
            num = self.indexes.stop - ofs if self.indexes.stop else None
            new_indexes = self._combine(slice_list[0], num, ofs)
        slice_list[0] = new_indexes
        if len(new_nums) < len(slice_list):
            new_nums.extend([None] * (len(slice_list) - len(new_nums)))
            new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
        for i in range(1, len(slice_list)):
            slice_list[i] = self._combine(slice_list[i], new_nums[i],
                                          new_offsets[i])
        for i in range(len(slice_list), len(new_nums)):
            cur_slice = (slice(new_offsets[i], new_offsets[i] + new_nums[i])
                         if not self.squeeze_dims[i] else new_offsets[i])
            slice_list.append(cur_slice)

        if isinstance(slice_list[0], (int, slice)):
            self.dataset._tensors[self.subpath][slice_list] = assign_value
        else:
            for i, index in enumerate(slice_list[0]):
                current_slice = [index] + slice_list[1:]
                self.dataset._tensors[subpath][current_slice] = assign_value[i]
Exemplo n.º 3
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> ds_view = ds[5:15]
        >>> ds_view["image", 3, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets the 8th image
        """
        self.dataset._auto_checkout()

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)
        slice_list = [0] + slice_list if isinstance(self.indexes,
                                                    int) else slice_list

        assign_value = get_value(value)
        schema_dict = self.dataset.schema
        if subpath[1:] in schema_dict.dict_.keys():
            schema_key = schema_dict.dict_.get(subpath[1:], None)
        else:
            for schema_key in subpath[1:].split("/"):
                schema_dict = schema_dict.dict_.get(schema_key, None)
                if not isinstance(schema_dict, SchemaDict):
                    schema_key = schema_dict
        if isinstance(schema_key, ClassLabel):
            assign_value = check_class_label(assign_value, schema_key)
        if isinstance(
                schema_key,
            (Text, bytes)) or (isinstance(assign_value, Iterable) and any(
                isinstance(val, str) for val in assign_value)):
            # handling strings and bytes
            assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without key")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in dataset")

        if not slice_list:
            slice_ = (slice(self.indexes[0], self.indexes[-1] +
                            1) if self.is_contiguous else self.indexes)
            if not isinstance(slice_, list):
                self.dataset._tensors[subpath][slice_] = assign_value
            else:
                for i, index in enumerate(slice_):
                    self.dataset._tensors[subpath][index] = assign_value[i]
        else:
            if isinstance(self.indexes, list):
                indexes = self.indexes[slice_list[0]]
                if self.is_contiguous and isinstance(indexes,
                                                     list) and indexes:
                    slice_list[0] = slice(indexes[0], indexes[-1] + 1)
                else:
                    slice_list[0] = indexes
            else:
                slice_list[0] = self.indexes

            if not isinstance(slice_list[0], list):
                self.dataset._tensors[subpath][slice_list] = assign_value
            else:
                for i, index in enumerate(slice_list[0]):
                    current_slice = [index] + slice_list[1:]
                    self.dataset._tensors[subpath][
                        current_slice] = assign_value[i]