示例#1
0
def test_delayed_samples():
    def load_data():
        return 0

    def load_annot():
        return "annotation"

    def load_annot_variant():
        return "annotation_variant"

    delayed_attr_read = False

    def load_check():
        nonlocal delayed_attr_read
        delayed_attr_read = True
        return "delayed_attr_data"

    delayed_sample = DelayedSample(load_data,
                                   delayed_attributes=dict(annot=load_annot))
    assert delayed_sample.data == 0, delayed_sample.data
    assert delayed_sample.annot == "annotation", delayed_sample.annot

    child_sample = Sample(1, parent=delayed_sample)
    assert child_sample.data == 1, child_sample.data
    assert child_sample.annot == "annotation", child_sample.annot
    assert child_sample.__dict__ == {
        "data": 1,
        "annot": "annotation",
    }, child_sample.__dict__

    # Overwriting and adding delayed_attributes to the child
    new_delayed_attr = {
        "annot": load_annot_variant,  # Override parent's annot
        "new_annot": load_annot,  # Add the new_annot attribute
        "read_check": load_check,
    }
    child_sample = DelayedSample(load_data,
                                 parent=delayed_sample,
                                 delayed_attributes=new_delayed_attr)
    assert child_sample.data == 0, child_sample.data
    assert child_sample.annot == "annotation_variant", child_sample.annot
    assert child_sample.new_annot == "annotation", child_sample.new_annot
    assert not delayed_attr_read, "delayed attribute has been read early"
    assert child_sample.read_check == "delayed_attr_data", child_sample.read_check
    assert delayed_attr_read, "delayed attribute should have been read by now"

    delayed_sample.annot = "changed"
    assert delayed_sample.annot == "changed", delayed_sample.annot
示例#2
0
def compare_samples(samples, pipeline, dask_client, verbose):
    """Compare several samples in a All vs All fashion."""
    if len(samples) == 1:
        raise ValueError(
            "It's necessary to have at least two samples for the comparison")

    sample_sets = [
        SampleSet(
            [
                DelayedSample(functools.partial(bob.io.base.load, s),
                              key=str(s))
            ],
            key=str(s),
            biometric_id=str(i),
        ) for i, s in enumerate(samples)
    ]
    if dask_client is not None:
        pipeline = dask_bio_pipeline(pipeline)

    table = [[s for s in samples]]
    biometric_references = pipeline.create_biometric_reference(sample_sets)
    scores = pipeline.compute_scores(sample_sets, biometric_references)
    if dask_client is not None:
        scores = scores.compute(scheduler=dask_client)
    for sset in scores:
        table.append([str(s.data) for s in sset])

    print("All vs All comparison")
    print(tabulate(table))

    if dask_client is not None:
        dask_client.shutdown()
示例#3
0
    def convert_row_to_sample(self, row, header=None):

        if len(row) == 4:
            path = row[0]
            compare_reference_id = row[1]
            reference_id = str(row[3])
            kwargs = {"compare_reference_id": str(compare_reference_id)}
        else:
            path = row[0]
            reference_id = str(row[1])
            kwargs = dict()
            if len(row) == 3:
                subject = row[2]
                kwargs = {"subject_id": str(subject)}

        return DelayedSample(
            functools.partial(
                self.data_loader,
                os.path.join(
                    self.dataset_original_directory, path + self.extension
                ),
            ),
            key=path,
            reference_id=reference_id,
            **kwargs,
        )
示例#4
0
    def transform(self, X):
        """
        Convert  leye_x, leye_y, reye_x, reye_y attributes to `annotations = (leye, reye)`
        """

        annotated_samples = []
        for x in X:
            eyes = {
                "leye": (
                    float(find_attribute(x, "leye_y")),
                    float(find_attribute(x, "leye_x")),
                ),
                "reye": (
                    float(find_attribute(x, "reye_y")),
                    float(find_attribute(x, "reye_x")),
                ),
            }

            sample = DelayedSample.from_sample(x, annotations=eyes)
            [
                delattr(sample, a)
                for a in ["leye_x", "leye_y", "reye_x", "reye_y"]
            ]
            annotated_samples.append(sample)

        return annotated_samples
示例#5
0
    def _make_sampleset_from_filedict(self, file_dict, reference_ids=None):
        samplesets = []
        for key in file_dict:
            f = file_dict[key]

            annotations_key = os.path.basename(f.path)

            kwargs = ({
                "references": reference_ids
            } if reference_ids is not None else {})

            samplesets.append(
                SampleSet(
                    key=f.path,
                    reference_id=f.reference_id,
                    subject_id=f.subject_id,
                    **kwargs,
                    samples=[
                        DelayedSample(
                            key=f.path,
                            annotations=self.annotations[annotations_key],
                            load=partial(
                                bob.io.base.load,
                                os.path.join(
                                    self.original_directory,
                                    f.path + self.extension,
                                ),
                            ),
                        )
                    ],
                ))
        return samplesets
示例#6
0
    def _enroll_sample_set(self, sampleset):
        """
        Enroll a sample set with checkpointing
        """

        # Amending `models` directory
        hash_dir_name = (self.hash_fn(str(sampleset.key))
                         if self.hash_fn is not None else "")

        path = os.path.join(
            self.biometric_reference_dir,
            hash_dir_name,
            str(sampleset.key) + self.extension,
        )

        if self.force or not os.path.exists(path):

            enrolled_sample = (
                self.biometric_algorithm.create_templates_from_samplesets(
                    [sampleset], enroll=True)[0])

            # saving the new sample
            os.makedirs(os.path.dirname(path), exist_ok=True)
            self.write_biometric_reference(enrolled_sample, path)

        # This seems inefficient, but it's crucial for large datasets
        delayed_enrolled_sample = DelayedSample(functools.partial(
            self.load_func, path),
                                                parent=sampleset)

        return delayed_enrolled_sample
示例#7
0
    def background_model_samples(self):
        """This function returns the training set for the open-set protocols o1, o2 and o3.
        It returns the :py:meth:`references` and the training samples with known unknowns, which get the subject id "unknown".

        Returns
        -------

        [bob.pipelines.SampleSet]
            The training samples, where each sampleset contains all images of one subject.
            Only the samples of the "unknown" subject are collected from several subjects.

        """
        if self.protocol[0] != "o":
            return []

        # return a list of samplesets for each enrollment image and each known unknown training sample
        enrollmentset = self.references()
        data = {}
        for image in self.pairs["training-unknown"]:
            # get image path
            image_path = os.path.join(
                self.original_directory,
                self.image_relative_path,
                self.make_path_from_filename(image) + self.extension,
            )
            # load annotations
            if self.annotation_directory is not None:
                annotation_path = os.path.join(
                    self.annotation_directory,
                    self.make_path_from_filename(image) +
                    self.annotation_extension,
                )
                annotations = self._extract(annotation_path)
            else:
                annotations = None
            data[image] = (image_path, annotations)

        # generate one sampleset from images of the unknown unknowns
        sset = SampleSet(
            key="unknown",
            reference_id="unknown",
            subject_id="unknown",
            samples=[
                DelayedSample(
                    key=image,
                    load=partial(bob.io.base.load, data[image][0]),
                    annotations=data[image][1],
                ) for image in data
            ],
        )
        return enrollmentset + [sset]
示例#8
0
    def convert_row_to_sample(self, row, header):
        path = row[0]
        reference_id = row[1]

        kwargs = dict([[str(h).lower(), r]
                       for h, r in zip(header[2:], row[2:])])

        return DelayedSample(functools.partial(
            self.data_loader,
            os.path.join(self.dataset_original_directory,
                         path + self.extension),
        ),
                             key=path,
                             reference_id=reference_id,
                             **kwargs)
示例#9
0
def get_fake_sample_set(face_size=(160, 160), purpose="bioref"):

    data = images[purpose][0]
    annotations = images[purpose][1]
    key = "1" if purpose == "bioref" else "2"

    return [
        SampleSet(
            [
                DelayedSample(
                    load=functools.partial(bob.io.base.load, data),
                    key=key,
                    annotations=annotations,
                )
            ],
            key=key,
            reference_id=key,
            references=["1"],
        )
    ]
示例#10
0
    def transform(self, X):
        if self.annotation_directory is None:
            return None

        annotated_samples = []
        for x in X:

            # since the file id is equal to the file name, we can simply use it
            annotation_file = os.path.join(self.annotation_directory,
                                           x.key + self.annotation_extension)

            annotated_samples.append(
                DelayedSample(
                    x._load,
                    parent=x,
                    delayed_attributes=dict(
                        annotations=lambda: bob.db.base.read_annotation_file(
                            annotation_file, self.annotation_type)),
                ))

        return annotated_samples
示例#11
0
 def transform(self, samples: list) -> list:
     output_samples = []
     for sample in samples:
         channel = getattr(sample, "channel", self.forced_channel)
         load_fn = partial(
             get_audio_data,
             sample.data,
             int(channel) if channel is not None else None,
             self.forced_sr,
         )
         delayed_attrs = {
             "rate": partial(get_audio_sample_rate, sample.data,
                             self.forced_sr)
         }
         new_sample = DelayedSample(
             load=load_fn,
             parent=sample,
             delayed_attributes=delayed_attrs,
         )
         output_samples.append(new_sample)
     return output_samples
示例#12
0
    def _make_sample_set(
        self, reference_id, subject_id, sample_path, references=None
    ):

        path = os.path.join(self.original_directory, sample_path)

        kwargs = {} if references is None else {"references": references}

        # Delaying the annotation loading
        delayed_annotations = partial(self._annotations, path)
        return SampleSet(
            key=str(reference_id),
            reference_id=str(reference_id),
            subject_id=str(subject_id),
            **kwargs,
            samples=[
                DelayedSample(
                    key=str(sample_path),
                    load=partial(self._load_video_from_path, path),
                    delayed_attributes={"annotations": delayed_annotations},
                )
            ],
        )
示例#13
0
    def convert_row_to_sample(self, row, header):
        path = row[0]
        reference_id = row[1]

        kwargs = dict(
            [[str(h).lower(), r] for h, r in zip(header[2:], row[2:])]
        )
        if self.reference_id_equal_subject_id:
            kwargs["subject_id"] = reference_id
        else:
            if "subject_id" not in kwargs:
                raise ValueError(f"`subject_id` not available in {header}")

        return DelayedSample(
            functools.partial(
                self.data_loader,
                os.path.join(
                    self.dataset_original_directory, path + self.extension
                ),
            ),
            key=path,
            reference_id=reference_id,
            **kwargs,
        )
示例#14
0
    def references(self, group="dev"):

        if self.protocol not in self.references_dict:
            self.references_dict[self.protocol] = []

            if self.protocol == "view2":
                for key in self.pairs:

                    image_path = os.path.join(
                        self.original_directory,
                        self.image_relative_path,
                        key + self.extension,
                    )
                    if self.annotation_directory is not None:
                        annotation_path = os.path.join(
                            self.annotation_directory,
                            key + self.annotation_extension,
                        )
                        annotations = self._extract(annotation_path)
                    else:
                        annotations = None

                    sset = SampleSet(
                        key=key,
                        reference_id=key,
                        subject_id=self.subject_id_from_filename(key),
                        samples=[
                            DelayedSample(
                                key=key,
                                reference_id=key,
                                load=partial(bob.io.base.load, image_path),
                                subject_id=self.subject_id_from_filename(key),
                                annotations=annotations,
                            )
                        ],
                    )
                    self.references_dict[self.protocol].append(sset)
            elif self.protocol[0] == "o":
                for key in self.pairs["enroll"]:
                    data = {}
                    for image in self.pairs["enroll"][key]:
                        # get image path
                        image_path = os.path.join(
                            self.original_directory,
                            self.image_relative_path,
                            self.make_path_from_filename(image) +
                            self.extension,
                        )
                        # load annotations
                        if self.annotation_directory is not None:
                            annotation_path = os.path.join(
                                self.annotation_directory,
                                self.make_path_from_filename(image) +
                                self.annotation_extension,
                            )
                            annotations = self._extract(annotation_path)
                        else:
                            annotations = None
                        data[image] = (image_path, annotations)

                    # generate one sampleset from several (should be 3) images of the same person
                    sset = SampleSet(
                        key=key,
                        reference_id=key,
                        subject_id=key,
                        samples=[
                            DelayedSample(
                                key=image,
                                reference_id=key,
                                load=partial(bob.io.base.load, data[image][0]),
                                annotations=data[image][1],
                            ) for image in data
                        ],
                    )
                    self.references_dict[self.protocol].append(sset)

        return self.references_dict[self.protocol]
示例#15
0
    def probes(self, group="dev"):
        if self.protocol not in self.probes_dict:
            self.probes_dict[self.protocol] = []

            if self.protocol == "view2":
                for key in self.probe_reference_keys:
                    image_path = os.path.join(
                        self.original_directory,
                        self.image_relative_path,
                        key + self.extension,
                    )
                    if self.annotation_directory is not None:
                        annotation_path = os.path.join(
                            self.annotation_directory,
                            key + self.annotation_extension,
                        )
                        annotations = self._extract(annotation_path)
                    else:
                        annotations = None

                    sset = SampleSet(
                        key=key,
                        reference_id=key,
                        subject_id=self.subject_id_from_filename(key),
                        references=copy.deepcopy(
                            self.probe_reference_keys[key]
                        ),  # deep copying to avoid bizarre issues with dask
                        samples=[
                            DelayedSample(
                                key=key,
                                reference_id=key,
                                subject_id=self.subject_id_from_filename(key),
                                load=partial(bob.io.base.load, image_path),
                                annotations=annotations,
                            )
                        ],
                    )
                    self.probes_dict[self.protocol].append(sset)

            elif self.protocol[0] == "o":
                # add known probes
                # collect probe samples:
                probes = [(image, key) for key in self.pairs["probe"]
                          for image in self.pairs["probe"][key]]
                if self.protocol in ("o1", "o3"):
                    probes += [(image, "unknown")
                               for image in self.pairs["o1"]]
                if self.protocol in ("o2", "o3"):
                    probes += [(image, "unknown")
                               for image in self.pairs["o2"]]

                for image, key in probes:
                    # get image path
                    image_path = os.path.join(
                        self.original_directory,
                        self.image_relative_path,
                        self.make_path_from_filename(image) + self.extension,
                    )
                    # load annotations
                    if self.annotation_directory is not None:
                        annotation_path = os.path.join(
                            self.annotation_directory,
                            self.make_path_from_filename(image) +
                            self.annotation_extension,
                        )
                        annotations = self._extract(annotation_path)
                    else:
                        annotations = None

                    # one probe sample per image
                    sset = SampleSet(
                        key=image,
                        reference_id=image,
                        subject_id=key,
                        samples=[
                            DelayedSample(
                                key=image,
                                reference_id=image,
                                load=partial(bob.io.base.load, image_path),
                                annotations=annotations,
                            )
                        ],
                    )
                    self.probes_dict[self.protocol].append(sset)

        return self.probes_dict[self.protocol]
示例#16
0
def annotate_samples(samples, reader, make_key, annotator, output_dir,
                     dask_client, **kwargs):
    """Annotates a list of samples.

    This command is very similar to ``bob bio annotate`` except that it works
    without a database interface. You must provide a list of samples as well as
    two functions:

        def reader(sample):
            # Loads data from a sample.
            # for example:
            data = bob.io.base.load(sample)
            # data will be given to the annotator
            return data

        def make_key(sample):
            # Creates a unique str identifier for this sample.
            # for example:
            return str(sample)
    """
    log_parameters(logger, ignore=("samples", ))

    # Allows passing of Sample objects as parameters
    annotator = wrap(["sample"], annotator, output_attribute="annotations")

    # Will save the annotations in the `data` fields to a json file
    annotator = wrap(
        bases=["checkpoint"],
        estimator=annotator,
        features_dir=output_dir,
        extension=".json",
        save_func=save_json,
        load_func=load_json,
        sample_attribute="annotations",
    )

    # Allows reception of Dask Bags
    annotator = wrap(["dask"], annotator)

    # Transformer that splits the samples into several Dask Bags
    to_dask_bags = ToDaskBag(npartitions=50)

    if dask_client is not None:
        scheduler = dask_client
    else:
        scheduler = "single-threaded"

    # Converts samples into a list of DelayedSample objects
    samples_obj = [
        DelayedSample(
            load=functools.partial(reader, s),
            key=make_key(s),
        ) for s in samples
    ]

    # Splits the samples list into bags
    dask_bags = to_dask_bags.transform(samples_obj)

    logger.info(f"Saving annotations in {output_dir}")
    logger.info(f"Annotating {len(samples_obj)} samples...")
    annotator.transform(dask_bags).compute(scheduler=scheduler)

    logger.info("All annotations written.")
示例#17
0
    def transform(self, X):
        """
        Convert  leye_x, leye_y, reye_x, reye_y attributes to `annotations = (leye, reye)`
        """

        annotated_samples = []

        for x in X:
            annotations = {
                "leye": (
                    float(x.leye_y),
                    float(x.leye_x),
                ),
                "reye": (
                    float(x.reye_y),
                    float(x.reye_x),
                ),
                "nose": (
                    float(x.nose_y),
                    float(x.nose_x),
                ),
                "lmouth": (
                    float(x.lmouth_y),
                    float(x.lmouth_x),
                ),
                "rmouth": (
                    float(x.rmouth_y),
                    float(x.rmouth_x),
                ),
                "topleft": (
                    float(x.face_y),
                    float(x.face_x),
                ),
                "size": (
                    float(x.face_h),
                    float(x.face_w),
                ),
            }

            sample = DelayedSample.from_sample(x, annotations=annotations)
            # Cleaning up
            [
                delattr(sample, a) for a in [
                    "leye_x",
                    "leye_y",
                    "reye_x",
                    "reye_y",
                    "nose_y",
                    "nose_x",
                    "face_y",
                    "face_x",
                    "face_h",
                    "face_w",
                    "lmouth_y",
                    "lmouth_x",
                    "rmouth_y",
                    "rmouth_x",
                ]
            ]
            annotated_samples.append(sample)

        return annotated_samples
示例#18
0
    def transform(self, X):

        annotated_samples = []
        for x in X:
            annotations = dict()
            if (find_attribute(x, "leye_x") != ""
                    and find_attribute(x, "reye_x") != ""):
                # Normal profile
                annotations = {
                    "leye": (
                        float(find_attribute(x, "leye_y")),
                        float(find_attribute(x, "leye_x")),
                    ),
                    "reye": (
                        float(find_attribute(x, "reye_y")),
                        float(find_attribute(x, "reye_x")),
                    ),
                }
            elif (find_attribute(x, "leye_x") != ""
                  and find_attribute(x, "reye_x") == ""):
                # Left profile
                annotations = {
                    "leye": (
                        float(find_attribute(x, "leye_y")),
                        float(find_attribute(x, "leye_x")),
                    ),
                    "mouth": (
                        float(find_attribute(x, "mouthl_y")),
                        float(find_attribute(x, "mouthl_x")),
                    ),
                }
            elif (find_attribute(x, "leye_x") == ""
                  and find_attribute(x, "reye_x") != ""):
                # Right profile
                annotations = {
                    "reye": (
                        float(find_attribute(x, "reye_y")),
                        float(find_attribute(x, "reye_x")),
                    ),
                    "mouth": (
                        float(find_attribute(x, "mouthr_y")),
                        float(find_attribute(x, "mouthr_x")),
                    ),
                }
            else:
                raise ValueError("Annotations not available")

            sample = DelayedSample.from_sample(x, annotations=annotations)
            [
                delattr(sample, a) for a in [
                    "reye_x",
                    "reye_y",
                    "leye_x",
                    "leye_y",
                    "nose_x",
                    "nose_y",
                    "mouthr_x",
                    "mouthr_y",
                    "mouthl_x",
                    "mouthl_y",
                    "chin_x",
                    "chin_y",
                ]
            ]

            annotated_samples.append(sample)

        return annotated_samples