def test_store_and_retrieve(self): x = int("0b01010100100100100100100010010100100100101001001010101010", 2) mask = BitMask(x) mask.set_kth_bit(11) mask.set_kth_bit(22) mask.set_kth_bit(33) mask.set_kth_bit(44) mask.set_kth_bit(55) mask.set_kth_bit(66) mask.set_kth_bit(77) mask.set_kth_bit(88) mask.set_kth_bit(99) somewhere = mask.to_hex() somewhere_else = mask.to_bin() mask_somewhere = BitMask.from_hex(somewhere) mask_somewhere_else = BitMask.from_bin(somewhere_else) self.assertEqual(mask.x, mask_somewhere.x) self.assertEqual(mask.x, mask_somewhere_else.x)
def create_tag_from_filenames(self, fnames_new_tag: List[str], new_tag_name: str, parent_tag_id: str = None) -> TagData: """Creates a new tag from a list of filenames. Args: fnames_new_tag: A list of filenames to be included in the new tag. new_tag_name: The name of the new tag. parent_tag_id: The tag defining where to sample from, default: None resolves to the initial-tag. Returns: The newly created tag. Raises: RuntimeError """ # make sure the tag name does not exist yet tags = self.get_all_tags() if new_tag_name in [tag.name for tag in tags]: raise RuntimeError( f'There already exists a tag with tag_name {new_tag_name}.') if len(tags) == 0: raise RuntimeError('There exists no initial-tag for this dataset.') # fallback to initial tag if no parent tag is provided if parent_tag_id is None: parent_tag_id = next(tag.id for tag in tags if tag.name == 'initial-tag') # get list of filenames from tag fnames_server = self.get_filenames() tot_size = len(fnames_server) # create new bitmask for the new tag bitmask = BitMask(0) fnames_new_tag = set(fnames_new_tag) for i, fname in enumerate(fnames_server): if fname in fnames_new_tag: bitmask.set_kth_bit(i) # quick sanity check num_selected_samples = len(bitmask.to_indices()) if num_selected_samples != len(fnames_new_tag): raise RuntimeError( f'An error occured when creating the new subset! ' f'Out of the {len(fnames_new_tag)} filenames you provided ' f'to create a new tag, only {num_selected_samples} have been ' f'found on the server. ' f'Make sure you use the correct filenames. ' f'Valid filename example from the dataset: {fnames_server[0]}') # create new tag tag_data_dict = { 'name': new_tag_name, 'prevTagId': parent_tag_id, 'bitMaskData': bitmask.to_hex(), 'totSize': tot_size } new_tag = self._tags_api.create_tag_by_dataset_id( tag_data_dict, self.dataset_id) return new_tag