def __setitem__(self, slice_, value): """| Sets a slice or slices with a value | Usage: >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") >>> images = ds["image"] >>> image = images[5] >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") """ if "r" in self._mode: raise ReadModeException("__setitem__") self._auto_checkout() assign_value = get_value(value) # handling strings and bytes assign_value = str_to_int(assign_value, self.tokenizer) if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) if not subpath: raise ValueError("Can't assign to dataset sliced without subpath") elif subpath not in self.keys: raise KeyError(f"Key {subpath} not found in the dataset") if not slice_list: self._tensors[subpath][:] = assign_value else: self._tensors[subpath][slice_list] = assign_value
def commit(self, message: str = "") -> str: """| Saves the current state of the dataset and returns the commit id. Checks out automatically to an auto branch if the current commit is not the head of the branch Only saves the dataset without any version control information if the dataset was created before Hub v1.3.0 Parameters ---------- message: str, optional The commit message to store along with the commit """ if self._commit_id is None: warnings.warn( "This dataset was created before version control, it does not support it. commit will behave same as flush" ) self.flush() elif "r" in self._mode: raise ReadModeException("commit") else: self._auto_checkout() stored_commit_id = self._commit_id self._commit_id = generate_hash() new_node = VersionNode(self._commit_id, self._branch) self._version_node.insert(new_node, message) self._version_node = new_node self._branch_node_map[self._branch] = new_node self._commit_node_map[self._commit_id] = new_node self.flush() return stored_commit_id
def checkout(self, address: str, create: bool = False) -> str: """| Changes the state of the dataset to the address mentioned. Creates a new branch if address isn't a commit id or branch name and create is True. Always checks out to the head of a branch if the address specified is a branch name. Returns the commit id of the commit that has been switched to. Only works if dataset was created on or after Hub v1.3.0 Parameters ---------- address: str The branch name or commit id to checkout to create: bool, optional Specifying create as True creates a new branch from the current commit if the address isn't an existing branch name or commit id """ if self._commit_id is None: raise VersioningNotSupportedException("checkout") self.flush() if address in self._branch_node_map.keys(): self._branch = address self._version_node = self._branch_node_map[address] self._commit_id = self._version_node.commit_id elif address in self._commit_node_map.keys(): self._version_node = self._commit_node_map[address] self._branch = self._version_node.branch self._commit_id = self._version_node.commit_id elif create: if "r" in self._mode: raise ReadModeException("checkout to create new branch") self._branch = address new_commit_id = generate_hash() new_node = VersionNode(new_commit_id, self._branch) if not self._version_node.children: for key in self.keys: self._tensors[key].fs_map.copy_all_chunks( self._commit_id, new_commit_id ) if self._version_node.parent is not None: self._version_node.parent.insert( new_node, f"switched to new branch {address}" ) else: self._version_node.insert(new_node, f"switched to new branch {address}") self._version_node = new_node self._commit_id = new_commit_id self._branch_node_map[self._branch] = new_node self._commit_node_map[self._commit_id] = new_node self.flush() else: raise AddressNotFound(address) return self._commit_id
def __setitem__(self, slice_, value): """| Sets a slice or slices with a value | Usage: >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") >>> images = ds["image"] >>> image = images[5] >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") """ if "r" in self._mode: raise ReadModeException("__setitem__") self._auto_checkout() if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) if not subpath: raise ValueError("Can't assign to dataset sliced without subpath") elif subpath not in self.keys: raise KeyError(f"Key {subpath} not found in the dataset") assign_value = get_value(value) schema_dict = self.schema if subpath[1:] in schema_dict.dict_.keys(): schema_key = schema_dict.dict_.get(subpath[1:], None) else: for schema_key in subpath[1:].split("/"): schema_dict = schema_dict.dict_.get(schema_key, None) if not isinstance(schema_dict, SchemaDict): schema_key = schema_dict if isinstance(schema_key, ClassLabel): assign_value = check_class_label(assign_value, schema_key) if isinstance( schema_key, (Text, bytes)) or (isinstance(assign_value, Iterable) and any( isinstance(val, str) for val in assign_value)): # handling strings and bytes assign_value = str_to_int(assign_value, self.tokenizer) if not slice_list: self._tensors[subpath][:] = assign_value else: self._tensors[subpath][slice_list] = assign_value