def _create_sample_sets(raw_data, offset, references=None):
    if references is None:
        return [
            SampleSet(
                [
                    Sample(
                        s,
                        reference_id=str(i + offset),
                        key=str(uuid.uuid4()),
                    )
                ],
                key=str(i + offset),
                reference_id=str(i + offset),
                subject_id=str(i + offset),
            ) for i, s in enumerate(raw_data)
        ]
    else:
        return [
            SampleSet(
                [
                    Sample(
                        s,
                        reference_id=str(i + offset),
                        key=str(uuid.uuid4()),
                    )
                ],
                key=str(i + offset),
                reference_id=str(i + offset),
                subject_id=str(i + offset),
                references=references,
            ) for i, s in enumerate(raw_data)
        ]
示例#2
0
def test_extractor_fittable():

    with tempfile.TemporaryDirectory() as dir_name:

        extractor_file = os.path.join(dir_name, "Extractor.hdf5")
        extractor = FakeExtractorFittable()
        extractor_transformer = ExtractorTransformer(extractor,
                                                     model_path=extractor_file)

        # Testing sample
        sample_transformer = SampleWrapper(extractor_transformer)
        # Fitting
        training_data = np.arange(4).reshape(2, 2)
        training_samples = [Sample(training_data, key="1")]
        sample_transformer = sample_transformer.fit(training_samples)

        test_data = [np.zeros((2, 2)), np.ones((2, 2))]
        oracle = [np.zeros((2, 2)), np.ones((2, 2)) @ training_data]
        test_sample = [Sample(d, key=str(i)) for i, d in enumerate(test_data)]

        transformed_sample = sample_transformer.transform(test_sample)
        assert assert_sample(transformed_sample, oracle)

        # Testing checkpoint
        checkpointing_transformer = CheckpointWrapper(
            sample_transformer,
            features_dir=dir_name,
            load_func=extractor.read_feature,
            save_func=extractor.write_feature,
        )
        transformed_sample = checkpointing_transformer.transform(test_sample)
        assert assert_sample(transformed_sample, oracle)
        assert assert_checkpoints(transformed_sample, dir_name)
示例#3
0
def test_sampleset_collection():

    n_samples = 10
    X = np.ones(shape=(n_samples, 2), dtype=int)
    sampleset = SampleSet(
        [Sample(data, key=str(i)) for i, data in enumerate(X)], key="1")
    assert len(sampleset) == n_samples

    # Testing insert
    sample = Sample(X, key=100)
    sampleset.insert(1, sample)
    assert len(sampleset) == n_samples + 1

    # Testing delete
    del sampleset[0]
    assert len(sampleset) == n_samples

    # Testing set
    sampleset[0] = copy.deepcopy(sample)

    # Testing iterator
    for i in sampleset:
        assert isinstance(i, Sample)

    def _load(path):
        return pickle.loads(open(path, "rb").read())

    # Testing delayed sampleset
    with tempfile.TemporaryDirectory() as dir_name:

        samples = [Sample(data, key=str(i)) for i, data in enumerate(X)]
        filename = os.path.join(dir_name, "samples.pkl")
        with open(filename, "wb") as f:
            f.write(pickle.dumps(samples))

        sampleset = DelayedSampleSet(functools.partial(_load, filename), key=1)

        assert len(sampleset) == n_samples
        assert sampleset.samples == samples

    # Testing delayed sampleset cached
    with tempfile.TemporaryDirectory() as dir_name:

        samples = [Sample(data, key=str(i)) for i, data in enumerate(X)]
        filename = os.path.join(dir_name, "samples.pkl")
        with open(filename, "wb") as f:
            f.write(pickle.dumps(samples))

        sampleset = DelayedSampleSetCached(functools.partial(_load, filename),
                                           key=1)

        assert len(sampleset) == n_samples
        assert sampleset.samples == samples
示例#4
0
    def fit(self, t_scores, y=None):

        # TODO: THIS IS SUPER INNEFICIENT, BUT
        # IT'S THE MOST READABLE SOLUTION
        # Stacking scores by biometric reference
        self.t_stats = dict()

        for sset in t_scores:

            self.t_stats[sset.reference_id] = Sample([s.data for s in sset],
                                                     parent=sset)

        # Now computing the statistics in place
        for key in self.t_stats:
            data = self.t_stats[key].data

            # Selecting the top scores
            if self.top_norm:
                # Sorting in ascending order
                data = -np.sort(-data)
                proportion = int(
                    np.floor(len(data) * self.top_norm_score_fraction))
                data = data[0:proportion]

            self.t_stats[key].mu = np.mean(self.t_stats[key].data)
            self.t_stats[key].std = np.std(self.t_stats[key].data)
            # self._z_stats[key].std = legacy_std(
            #    self._z_stats[key].mu, self._z_stats[key].data
            # )
            self.t_stats[key].data = []

        return self
示例#5
0
def test_mod_4hz():
    """Loading and running the mod-4hz annotator."""
    # Test setup and config
    annotator = bob.bio.base.load_resource("mod-4hz", "annotator")
    assert isinstance(annotator, bob.bio.spear.annotator.Mod_4Hz)

    # Read input
    rate, wav = _wav()

    # Test the VAD annotator
    annotator = bob.bio.spear.annotator.Mod_4Hz()
    _compare(
        annotator.transform_one(wav, sample_rate=rate),
        pkg_resources.resource_filename(
            "bob.bio.spear.test", "data/vad_mod_4hz.hdf5"
        ),
    )

    # Test the processing of Sample objects and tags of annotator transformer
    wrapped_annotator = wrap(["sample"], annotator)
    samples = [Sample(data=wav, rate=rate)]
    # Attribute `rate` should be passed as `sample_rate` argument of transform (tags)
    result = wrapped_annotator.transform(samples)
    # Annotations should be in attribute `annotations` of result samples (tags)
    _compare(
        result[0].annotations,
        pkg_resources.resource_filename(
            "bob.bio.spear.test", "data/vad_mod_4hz.hdf5"
        ),
    )
示例#6
0
def test_preprocessor():

    preprocessor = FakePreprocesor()
    preprocessor_transformer = PreprocessorTransformer(preprocessor)

    # Testing sample
    transform_extra_arguments = [("annotations", "annotations")]
    sample_transformer = SampleWrapper(preprocessor_transformer,
                                       transform_extra_arguments)

    data = np.zeros((2, 2))
    oracle = [np.ones((2, 2))]
    annotations = 1
    sample = [Sample(data, key="1", annotations=annotations)]
    transformed_sample = sample_transformer.transform(sample)

    assert assert_sample(transformed_sample, oracle)

    # Testing checkpoint
    with tempfile.TemporaryDirectory() as dir_name:
        checkpointing_transformer = CheckpointWrapper(
            sample_transformer,
            features_dir=dir_name,
            load_func=preprocessor.read_data,
            save_func=preprocessor.write_data,
        )
        transformed_sample = checkpointing_transformer.transform(sample)

        assert assert_sample(transformed_sample, oracle)
        assert assert_checkpoints(transformed_sample, dir_name)
示例#7
0
def test_extractor():

    extractor = FakeExtractor()
    extractor_transformer = ExtractorTransformer(extractor)

    # Testing sample
    sample_transformer = SampleWrapper(extractor_transformer)

    data = np.zeros((2, 2))
    oracle = [np.zeros((1, 4))]
    sample = [Sample(data, key="1")]
    transformed_sample = sample_transformer.transform(sample)

    assert assert_sample(transformed_sample, oracle)

    # Testing checkpoint
    with tempfile.TemporaryDirectory() as dir_name:
        checkpointing_transformer = CheckpointWrapper(
            sample_transformer,
            features_dir=dir_name,
            load_func=extractor.read_feature,
            save_func=extractor.write_feature,
        )
        transformed_sample = checkpointing_transformer.transform(sample)

        assert assert_sample(transformed_sample, oracle)
        assert assert_checkpoints(transformed_sample, dir_name)
示例#8
0
def test_sample_hdf5():
    n_samples = 10
    X = np.ones(shape=(n_samples, 2), dtype=int)

    samples = [
        Sample(data, key=str(i), subject="Subject") for i, data in enumerate(X)
    ]
    with tempfile.TemporaryDirectory() as dir_name:

        # Single sample
        filename = os.path.join(dir_name, "sample.hdf5")

        with h5py.File(filename, "w", driver="core") as hdf5:
            sample_to_hdf5(samples[0], hdf5)

        with h5py.File(filename, "r") as hdf5:
            sample = hdf5_to_sample(hdf5)

        assert sample == samples[0]

        # List of samples
        filename = os.path.join(dir_name, "samples.hdf5")
        with h5py.File(filename, "w", driver="core") as hdf5:
            sample_to_hdf5(samples, hdf5)

        with h5py.File(filename, "r") as hdf5:
            samples_deserialized = hdf5_to_sample(hdf5)

        compare = [a == b for a, b in zip(samples_deserialized, samples)]
        assert np.sum(compare) == 10
示例#9
0
        def _transform_samples(X, stats):
            scores = []
            for no_normed_score in X:
                score = (no_normed_score.data - stats.mu) / stats.std

                t_score = Sample(score, parent=no_normed_score)
                scores.append(t_score)
            return scores
示例#10
0
def get_fake_samples_for_training():

    data = np.random.rand(10, 3, 400, 400)
    annotations = {"reye": (131, 176), "leye": (222, 170)}

    return [
        Sample(x, key=str(i), reference_id=str(i), annotations=annotations)
        for i, x in enumerate(data)
    ]
示例#11
0
        def _transform_samples(X):
            scores = []
            for no_normed_score in X:
                score = (no_normed_score.data -
                         self.z_stats[no_normed_score.reference_id].mu
                         ) / self.z_stats[no_normed_score.reference_id].std

                z_score = Sample(score, parent=no_normed_score)
                scores.append(z_score)
            return scores
示例#12
0
def test_resample():
    """Resample using the transformer."""
    audio_path = resource_filename("bob.bio.spear.test", "data/sample.wav")
    audio_n_samples = 77760
    audio_sample_rate = 16000

    sample = Sample(data=audio_path, channel=None, rate=audio_sample_rate)
    pipeline = make_pipeline(
        PathToAudio(), wrap(["sample"], Resample(audio_sample_rate // 2)))
    results = pipeline.transform([sample])[0]
    assert results.data.shape == (audio_n_samples // 2, ), results.data.shape
 def _create_random_2dsamples(self, n_samples, offset, dim):
     return [
         Sample(
             np.random.rand(dim, dim),
             key=str(uuid.uuid4()),
             annotations=1,
             reference_id=str(i),
             subject_id=str(i),
         )
         for i in range(offset, offset + n_samples)
     ]
示例#14
0
    def create_templates_from_samplesets(self, list_of_samplesets, enroll):
        """Creates enroll or probe templates from multiple SampleSets.

        Parameters
        ----------
        list_of_samplesets : list
            A list (length N) of SampleSets.

        enroll : bool
            If True, the SampleSets are for enrollment. If False, the SampleSets
            are for probe.

        Returns
        -------
        templates : list
            A list of Samples which has the same length as ``list_of_samplesets``.
            Each Sample contains a template.
        """
        logger.debug(
            f"{_frmt(self)}.create_templates_from_samplesets(... enroll={enroll})"
        )
        # create templates from .data attribute of samples inside sample_sets
        list_of_feature_sets = []
        for sampleset in list_of_samplesets:
            data = [s.data for s in sampleset.samples]
            valid_data = [d for d in data if d is not None]
            if len(data) != len(valid_data):
                logger.warning(
                    f"Removed {len(data)-len(valid_data)} invalid enrollment samples."
                )
            if not valid_data and enroll:
                # we do not support failure to enroll cases currently
                raise NotImplementedError(
                    f"None of the enrollment samples were valid for {sampleset}."
                )
            list_of_feature_sets.append(valid_data)

        templates = self.create_templates(list_of_feature_sets, enroll)
        expected_size = len(list_of_samplesets)
        assert len(templates) == expected_size, (
            "The number of (%s) templates (%d) created by the algorithm does not match "
            "the number of sample sets (%d)" % (
                "enroll" if enroll else "probe",
                len(templates),
                expected_size,
            ))
        # return a list of Samples (one per template)
        templates = [
            Sample(t, parent=sampleset)
            for t, sampleset in zip(templates, list_of_samplesets)
        ]
        return templates
示例#15
0
def test_path_to_audio():
    """Tries to load the audio data from a file."""
    audio_path = resource_filename("bob.bio.spear.test", "data/sample.wav")
    audio_n_samples = 77760
    audio_sample_rate = 16000

    sample = Sample(data=audio_path)
    transformer = PathToAudio()
    results = transformer.transform([sample])[0]
    assert results.rate == audio_sample_rate, results.rate
    assert isinstance(results.data, np.ndarray)
    assert results.data.shape == (audio_n_samples, ), results.data.shape

    assert results.data.dtype == np.float32, results.data.dtype

    # Force a different sample rate
    sample = Sample(data=audio_path)
    transformer = PathToAudio(forced_sr=audio_sample_rate // 2)
    results = transformer.transform([sample])[0]
    assert results.rate == audio_sample_rate // 2, results.rate
    assert isinstance(results.data, np.ndarray)
    assert results.data.shape == (audio_n_samples // 2, ), results.data.shape
示例#16
0
def test_delayed_samples():
    def load_data():
        return 0

    def load_annot():
        return "annotation"

    def load_annot_variant():
        return "annotation_variant"

    delayed_attr_read = False

    def load_check():
        nonlocal delayed_attr_read
        delayed_attr_read = True
        return "delayed_attr_data"

    delayed_sample = DelayedSample(load_data,
                                   delayed_attributes=dict(annot=load_annot))
    assert delayed_sample.data == 0, delayed_sample.data
    assert delayed_sample.annot == "annotation", delayed_sample.annot

    child_sample = Sample(1, parent=delayed_sample)
    assert child_sample.data == 1, child_sample.data
    assert child_sample.annot == "annotation", child_sample.annot
    assert child_sample.__dict__ == {
        "data": 1,
        "annot": "annotation",
    }, child_sample.__dict__

    # Overwriting and adding delayed_attributes to the child
    new_delayed_attr = {
        "annot": load_annot_variant,  # Override parent's annot
        "new_annot": load_annot,  # Add the new_annot attribute
        "read_check": load_check,
    }
    child_sample = DelayedSample(load_data,
                                 parent=delayed_sample,
                                 delayed_attributes=new_delayed_attr)
    assert child_sample.data == 0, child_sample.data
    assert child_sample.annot == "annotation_variant", child_sample.annot
    assert child_sample.new_annot == "annotation", child_sample.new_annot
    assert not delayed_attr_read, "delayed attribute has been read early"
    assert child_sample.read_check == "delayed_attr_data", child_sample.read_check
    assert delayed_attr_read, "delayed attribute should have been read by now"

    delayed_sample.annot = "changed"
    assert delayed_sample.annot == "changed", delayed_sample.annot
示例#17
0
def generate_samples(n_subjects,
                     n_samples_per_subject,
                     shape=(2, 2),
                     annotations=1):
    """
    Simple sample generator that generates a certain number of samples per
    subject, whose data is np.zeros + subject index
    """

    samples = []
    for i in range(n_subjects):
        data = np.zeros(shape) + i
        for j in range(n_samples_per_subject):
            samples += [
                Sample(
                    data,
                    subject=str(i),
                    key=str(i * n_subjects + j),
                    annotations=annotations,
                )
            ]
    return samples
示例#18
0
def _delayed_samples_to_samples(delayed_samples):
    return [Sample(sample.data, parent=sample) for sample in delayed_samples]
示例#19
0
    def score_sample_templates(self, probe_samples, enroll_samples,
                               score_all_vs_all):
        """Computes the similarity score between all probe and enroll templates.

        Parameters
        ----------
        probe_samples : list
            A list (length N) of Samples containing probe templates.

        enroll_samples : list
            A list (length M) of Samples containing enroll templates.

        score_all_vs_all : bool
            If True, the similarity scores between all probe and enroll templates
            are computed. If False, the similarity scores between the probes and
            their associated enroll templates are computed.

        Returns
        -------
        score_samplesets : list
            A list of N SampleSets each containing a list of M score Samples if score_all_vs_all
            is True. Otherwise, a list of N SampleSets each containing a list of <=M score Samples
            depending on the database.
        """
        logger.debug(
            f"{_frmt(self)}.score_sample_templates(... score_all_vs_all={score_all_vs_all})"
        )
        # Returns a list of SampleSets where a Sampleset for each probe
        # SampleSet where each Sample inside the SampleSets contains the score
        # for one enroll SampleSet
        score_samplesets = []
        if score_all_vs_all:
            probe_data = [s.data for s in probe_samples]
            valid_probe_indices = [
                i for i, d in enumerate(probe_data) if _data_valid(d)
            ]
            valid_probe_data = [probe_data[i] for i in valid_probe_indices]
            scores = self.compare(SampleBatch(enroll_samples),
                                  valid_probe_data)
            scores = np.asarray(scores, dtype=float)

            if len(valid_probe_indices) != len(probe_data):
                # inject None scores for invalid probe samples
                scores: list = scores.T.tolist()
                for i in range(len(probe_data)):
                    if i not in valid_probe_indices:
                        scores.insert(i, [None] * len(enroll_samples))
                # transpose back to original shape
                scores = np.array(scores, dtype=float).T

            expected_shape = (len(enroll_samples), len(probe_samples))
            assert scores.shape == expected_shape, (
                "The shape of the similarity scores (%s) does not match the expected shape (%s)"
                % (scores.shape, expected_shape))
            for j, probe in enumerate(probe_samples):
                samples = []
                for i, enroll in enumerate(enroll_samples):
                    samples.append(Sample(scores[i, j], parent=enroll))
                score_samplesets.append(SampleSet(samples, parent=probe))
        else:
            for probe in probe_samples:
                references = [str(ref) for ref in probe.references]
                # get the indices of references for enroll samplesets
                indices = [
                    i for i, enroll in enumerate(enroll_samples)
                    if str(enroll.reference_id) in references
                ]
                if not indices:
                    raise ValueError(
                        f"No enroll sampleset found for probe {probe} and its required references {references}. "
                        "Did you mean to set score_all_vs_all=True?")
                if not _data_valid(probe.data):
                    scores = [[None]] * len(indices)
                else:
                    scores = self.compare(
                        SampleBatch([enroll_samples[i] for i in indices]),
                        SampleBatch([probe]),
                    )
                scores = np.asarray(scores, dtype=float)
                expected_shape = (len(indices), 1)
                assert scores.shape == expected_shape, (
                    "The shape of the similarity scores (%s) does not match the expected shape (%s)"
                    % (scores.shape, expected_shape))
                samples = []
                for i, j in enumerate(indices):
                    samples.append(
                        Sample(scores[i, 0], parent=enroll_samples[j]))
                score_samplesets.append(SampleSet(samples, parent=probe))

        return score_samplesets