def test_delayed_samples(): def load_data(): return 0 def load_annot(): return "annotation" def load_annot_variant(): return "annotation_variant" delayed_attr_read = False def load_check(): nonlocal delayed_attr_read delayed_attr_read = True return "delayed_attr_data" delayed_sample = DelayedSample(load_data, delayed_attributes=dict(annot=load_annot)) assert delayed_sample.data == 0, delayed_sample.data assert delayed_sample.annot == "annotation", delayed_sample.annot child_sample = Sample(1, parent=delayed_sample) assert child_sample.data == 1, child_sample.data assert child_sample.annot == "annotation", child_sample.annot assert child_sample.__dict__ == { "data": 1, "annot": "annotation", }, child_sample.__dict__ # Overwriting and adding delayed_attributes to the child new_delayed_attr = { "annot": load_annot_variant, # Override parent's annot "new_annot": load_annot, # Add the new_annot attribute "read_check": load_check, } child_sample = DelayedSample(load_data, parent=delayed_sample, delayed_attributes=new_delayed_attr) assert child_sample.data == 0, child_sample.data assert child_sample.annot == "annotation_variant", child_sample.annot assert child_sample.new_annot == "annotation", child_sample.new_annot assert not delayed_attr_read, "delayed attribute has been read early" assert child_sample.read_check == "delayed_attr_data", child_sample.read_check assert delayed_attr_read, "delayed attribute should have been read by now" delayed_sample.annot = "changed" assert delayed_sample.annot == "changed", delayed_sample.annot
def compare_samples(samples, pipeline, dask_client, verbose): """Compare several samples in a All vs All fashion.""" if len(samples) == 1: raise ValueError( "It's necessary to have at least two samples for the comparison") sample_sets = [ SampleSet( [ DelayedSample(functools.partial(bob.io.base.load, s), key=str(s)) ], key=str(s), biometric_id=str(i), ) for i, s in enumerate(samples) ] if dask_client is not None: pipeline = dask_bio_pipeline(pipeline) table = [[s for s in samples]] biometric_references = pipeline.create_biometric_reference(sample_sets) scores = pipeline.compute_scores(sample_sets, biometric_references) if dask_client is not None: scores = scores.compute(scheduler=dask_client) for sset in scores: table.append([str(s.data) for s in sset]) print("All vs All comparison") print(tabulate(table)) if dask_client is not None: dask_client.shutdown()
def convert_row_to_sample(self, row, header=None): if len(row) == 4: path = row[0] compare_reference_id = row[1] reference_id = str(row[3]) kwargs = {"compare_reference_id": str(compare_reference_id)} else: path = row[0] reference_id = str(row[1]) kwargs = dict() if len(row) == 3: subject = row[2] kwargs = {"subject_id": str(subject)} return DelayedSample( functools.partial( self.data_loader, os.path.join( self.dataset_original_directory, path + self.extension ), ), key=path, reference_id=reference_id, **kwargs, )
def transform(self, X): """ Convert leye_x, leye_y, reye_x, reye_y attributes to `annotations = (leye, reye)` """ annotated_samples = [] for x in X: eyes = { "leye": ( float(find_attribute(x, "leye_y")), float(find_attribute(x, "leye_x")), ), "reye": ( float(find_attribute(x, "reye_y")), float(find_attribute(x, "reye_x")), ), } sample = DelayedSample.from_sample(x, annotations=eyes) [ delattr(sample, a) for a in ["leye_x", "leye_y", "reye_x", "reye_y"] ] annotated_samples.append(sample) return annotated_samples
def _make_sampleset_from_filedict(self, file_dict, reference_ids=None): samplesets = [] for key in file_dict: f = file_dict[key] annotations_key = os.path.basename(f.path) kwargs = ({ "references": reference_ids } if reference_ids is not None else {}) samplesets.append( SampleSet( key=f.path, reference_id=f.reference_id, subject_id=f.subject_id, **kwargs, samples=[ DelayedSample( key=f.path, annotations=self.annotations[annotations_key], load=partial( bob.io.base.load, os.path.join( self.original_directory, f.path + self.extension, ), ), ) ], )) return samplesets
def _enroll_sample_set(self, sampleset): """ Enroll a sample set with checkpointing """ # Amending `models` directory hash_dir_name = (self.hash_fn(str(sampleset.key)) if self.hash_fn is not None else "") path = os.path.join( self.biometric_reference_dir, hash_dir_name, str(sampleset.key) + self.extension, ) if self.force or not os.path.exists(path): enrolled_sample = ( self.biometric_algorithm.create_templates_from_samplesets( [sampleset], enroll=True)[0]) # saving the new sample os.makedirs(os.path.dirname(path), exist_ok=True) self.write_biometric_reference(enrolled_sample, path) # This seems inefficient, but it's crucial for large datasets delayed_enrolled_sample = DelayedSample(functools.partial( self.load_func, path), parent=sampleset) return delayed_enrolled_sample
def background_model_samples(self): """This function returns the training set for the open-set protocols o1, o2 and o3. It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown". Returns ------- [bob.pipelines.SampleSet] The training samples, where each sampleset contains all images of one subject. Only the samples of the "unknown" subject are collected from several subjects. """ if self.protocol[0] != "o": return [] # return a list of samplesets for each enrollment image and each known unknown training sample enrollmentset = self.references() data = {} for image in self.pairs["training-unknown"]: # get image path image_path = os.path.join( self.original_directory, self.image_relative_path, self.make_path_from_filename(image) + self.extension, ) # load annotations if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, self.make_path_from_filename(image) + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None data[image] = (image_path, annotations) # generate one sampleset from images of the unknown unknowns sset = SampleSet( key="unknown", reference_id="unknown", subject_id="unknown", samples=[ DelayedSample( key=image, load=partial(bob.io.base.load, data[image][0]), annotations=data[image][1], ) for image in data ], ) return enrollmentset + [sset]
def convert_row_to_sample(self, row, header): path = row[0] reference_id = row[1] kwargs = dict([[str(h).lower(), r] for h, r in zip(header[2:], row[2:])]) return DelayedSample(functools.partial( self.data_loader, os.path.join(self.dataset_original_directory, path + self.extension), ), key=path, reference_id=reference_id, **kwargs)
def get_fake_sample_set(face_size=(160, 160), purpose="bioref"): data = images[purpose][0] annotations = images[purpose][1] key = "1" if purpose == "bioref" else "2" return [ SampleSet( [ DelayedSample( load=functools.partial(bob.io.base.load, data), key=key, annotations=annotations, ) ], key=key, reference_id=key, references=["1"], ) ]
def transform(self, X): if self.annotation_directory is None: return None annotated_samples = [] for x in X: # since the file id is equal to the file name, we can simply use it annotation_file = os.path.join(self.annotation_directory, x.key + self.annotation_extension) annotated_samples.append( DelayedSample( x._load, parent=x, delayed_attributes=dict( annotations=lambda: bob.db.base.read_annotation_file( annotation_file, self.annotation_type)), )) return annotated_samples
def transform(self, samples: list) -> list: output_samples = [] for sample in samples: channel = getattr(sample, "channel", self.forced_channel) load_fn = partial( get_audio_data, sample.data, int(channel) if channel is not None else None, self.forced_sr, ) delayed_attrs = { "rate": partial(get_audio_sample_rate, sample.data, self.forced_sr) } new_sample = DelayedSample( load=load_fn, parent=sample, delayed_attributes=delayed_attrs, ) output_samples.append(new_sample) return output_samples
def _make_sample_set( self, reference_id, subject_id, sample_path, references=None ): path = os.path.join(self.original_directory, sample_path) kwargs = {} if references is None else {"references": references} # Delaying the annotation loading delayed_annotations = partial(self._annotations, path) return SampleSet( key=str(reference_id), reference_id=str(reference_id), subject_id=str(subject_id), **kwargs, samples=[ DelayedSample( key=str(sample_path), load=partial(self._load_video_from_path, path), delayed_attributes={"annotations": delayed_annotations}, ) ], )
def convert_row_to_sample(self, row, header): path = row[0] reference_id = row[1] kwargs = dict( [[str(h).lower(), r] for h, r in zip(header[2:], row[2:])] ) if self.reference_id_equal_subject_id: kwargs["subject_id"] = reference_id else: if "subject_id" not in kwargs: raise ValueError(f"`subject_id` not available in {header}") return DelayedSample( functools.partial( self.data_loader, os.path.join( self.dataset_original_directory, path + self.extension ), ), key=path, reference_id=reference_id, **kwargs, )
def references(self, group="dev"): if self.protocol not in self.references_dict: self.references_dict[self.protocol] = [] if self.protocol == "view2": for key in self.pairs: image_path = os.path.join( self.original_directory, self.image_relative_path, key + self.extension, ) if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, key + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None sset = SampleSet( key=key, reference_id=key, subject_id=self.subject_id_from_filename(key), samples=[ DelayedSample( key=key, reference_id=key, load=partial(bob.io.base.load, image_path), subject_id=self.subject_id_from_filename(key), annotations=annotations, ) ], ) self.references_dict[self.protocol].append(sset) elif self.protocol[0] == "o": for key in self.pairs["enroll"]: data = {} for image in self.pairs["enroll"][key]: # get image path image_path = os.path.join( self.original_directory, self.image_relative_path, self.make_path_from_filename(image) + self.extension, ) # load annotations if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, self.make_path_from_filename(image) + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None data[image] = (image_path, annotations) # generate one sampleset from several (should be 3) images of the same person sset = SampleSet( key=key, reference_id=key, subject_id=key, samples=[ DelayedSample( key=image, reference_id=key, load=partial(bob.io.base.load, data[image][0]), annotations=data[image][1], ) for image in data ], ) self.references_dict[self.protocol].append(sset) return self.references_dict[self.protocol]
def probes(self, group="dev"): if self.protocol not in self.probes_dict: self.probes_dict[self.protocol] = [] if self.protocol == "view2": for key in self.probe_reference_keys: image_path = os.path.join( self.original_directory, self.image_relative_path, key + self.extension, ) if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, key + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None sset = SampleSet( key=key, reference_id=key, subject_id=self.subject_id_from_filename(key), references=copy.deepcopy( self.probe_reference_keys[key] ), # deep copying to avoid bizarre issues with dask samples=[ DelayedSample( key=key, reference_id=key, subject_id=self.subject_id_from_filename(key), load=partial(bob.io.base.load, image_path), annotations=annotations, ) ], ) self.probes_dict[self.protocol].append(sset) elif self.protocol[0] == "o": # add known probes # collect probe samples: probes = [(image, key) for key in self.pairs["probe"] for image in self.pairs["probe"][key]] if self.protocol in ("o1", "o3"): probes += [(image, "unknown") for image in self.pairs["o1"]] if self.protocol in ("o2", "o3"): probes += [(image, "unknown") for image in self.pairs["o2"]] for image, key in probes: # get image path image_path = os.path.join( self.original_directory, self.image_relative_path, self.make_path_from_filename(image) + self.extension, ) # load annotations if self.annotation_directory is not None: annotation_path = os.path.join( self.annotation_directory, self.make_path_from_filename(image) + self.annotation_extension, ) annotations = self._extract(annotation_path) else: annotations = None # one probe sample per image sset = SampleSet( key=image, reference_id=image, subject_id=key, samples=[ DelayedSample( key=image, reference_id=image, load=partial(bob.io.base.load, image_path), annotations=annotations, ) ], ) self.probes_dict[self.protocol].append(sset) return self.probes_dict[self.protocol]
def annotate_samples(samples, reader, make_key, annotator, output_dir, dask_client, **kwargs): """Annotates a list of samples. This command is very similar to ``bob bio annotate`` except that it works without a database interface. You must provide a list of samples as well as two functions: def reader(sample): # Loads data from a sample. # for example: data = bob.io.base.load(sample) # data will be given to the annotator return data def make_key(sample): # Creates a unique str identifier for this sample. # for example: return str(sample) """ log_parameters(logger, ignore=("samples", )) # Allows passing of Sample objects as parameters annotator = wrap(["sample"], annotator, output_attribute="annotations") # Will save the annotations in the `data` fields to a json file annotator = wrap( bases=["checkpoint"], estimator=annotator, features_dir=output_dir, extension=".json", save_func=save_json, load_func=load_json, sample_attribute="annotations", ) # Allows reception of Dask Bags annotator = wrap(["dask"], annotator) # Transformer that splits the samples into several Dask Bags to_dask_bags = ToDaskBag(npartitions=50) if dask_client is not None: scheduler = dask_client else: scheduler = "single-threaded" # Converts samples into a list of DelayedSample objects samples_obj = [ DelayedSample( load=functools.partial(reader, s), key=make_key(s), ) for s in samples ] # Splits the samples list into bags dask_bags = to_dask_bags.transform(samples_obj) logger.info(f"Saving annotations in {output_dir}") logger.info(f"Annotating {len(samples_obj)} samples...") annotator.transform(dask_bags).compute(scheduler=scheduler) logger.info("All annotations written.")
def transform(self, X): """ Convert leye_x, leye_y, reye_x, reye_y attributes to `annotations = (leye, reye)` """ annotated_samples = [] for x in X: annotations = { "leye": ( float(x.leye_y), float(x.leye_x), ), "reye": ( float(x.reye_y), float(x.reye_x), ), "nose": ( float(x.nose_y), float(x.nose_x), ), "lmouth": ( float(x.lmouth_y), float(x.lmouth_x), ), "rmouth": ( float(x.rmouth_y), float(x.rmouth_x), ), "topleft": ( float(x.face_y), float(x.face_x), ), "size": ( float(x.face_h), float(x.face_w), ), } sample = DelayedSample.from_sample(x, annotations=annotations) # Cleaning up [ delattr(sample, a) for a in [ "leye_x", "leye_y", "reye_x", "reye_y", "nose_y", "nose_x", "face_y", "face_x", "face_h", "face_w", "lmouth_y", "lmouth_x", "rmouth_y", "rmouth_x", ] ] annotated_samples.append(sample) return annotated_samples
def transform(self, X): annotated_samples = [] for x in X: annotations = dict() if (find_attribute(x, "leye_x") != "" and find_attribute(x, "reye_x") != ""): # Normal profile annotations = { "leye": ( float(find_attribute(x, "leye_y")), float(find_attribute(x, "leye_x")), ), "reye": ( float(find_attribute(x, "reye_y")), float(find_attribute(x, "reye_x")), ), } elif (find_attribute(x, "leye_x") != "" and find_attribute(x, "reye_x") == ""): # Left profile annotations = { "leye": ( float(find_attribute(x, "leye_y")), float(find_attribute(x, "leye_x")), ), "mouth": ( float(find_attribute(x, "mouthl_y")), float(find_attribute(x, "mouthl_x")), ), } elif (find_attribute(x, "leye_x") == "" and find_attribute(x, "reye_x") != ""): # Right profile annotations = { "reye": ( float(find_attribute(x, "reye_y")), float(find_attribute(x, "reye_x")), ), "mouth": ( float(find_attribute(x, "mouthr_y")), float(find_attribute(x, "mouthr_x")), ), } else: raise ValueError("Annotations not available") sample = DelayedSample.from_sample(x, annotations=annotations) [ delattr(sample, a) for a in [ "reye_x", "reye_y", "leye_x", "leye_y", "nose_x", "nose_y", "mouthr_x", "mouthr_y", "mouthl_x", "mouthl_y", "chin_x", "chin_y", ] ] annotated_samples.append(sample) return annotated_samples