Exemplo n.º 1
0
    def __init__(
            self,
            protocol,
            annotation_type="eyes-center",
            fixed_positions=None,
            dataset_original_directory=rc.get("bob.db.mobio.directory", ""),
            dataset_original_extension=rc.get("bob.db.mobio.extension",
                                              ".png"),
    ):

        # Downloading model if not exists
        urls = MobioDatabase.urls()
        filename = get_file("mobio.tar.gz",
                            urls,
                            file_hash="4a7f99b33a54b2dd337ddcaecb09edb8")

        super().__init__(
            name="mobio",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=dataset_original_directory,
                    extension=dataset_original_extension,
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 2
0
    def __init__(self,
                 protocol,
                 annotation_type="eyes-center",
                 fixed_positions=None):

        # Downloading model if not exists
        urls = CaspealDatabase.urls()
        filename = get_file(
            "caspeal.tar.gz",
            urls,
            file_hash="1c77f660ef85fa263a2312fd8263d0d9",
        )

        super().__init__(
            name="caspeal",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=rc[
                        "bob.bio.face.caspeal.directory"]
                    if rc["bob.bio.face.caspeal.directory"] else "",
                    extension=".png",
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 3
0
    def __init__(self,
                 protocol,
                 annotation_type="eyes-center",
                 fixed_positions=None):

        # Downloading model if not exists
        urls = CasiaAfricaDatabase.urls()
        filename = get_file(
            "casia-africa.tar.gz",
            urls,
            file_hash="080d4bfffec95a6445507065054757eb",
        )

        directory = (rc["bob.db.casia-africa.directory"]
                     if rc["bob.db.casia-africa.directory "] else "")

        super().__init__(
            name="casia-africa",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=directory,
                    extension=".jpg",
                    reference_id_equal_subject_id=False,
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 4
0
    def __init__(
        self,
        protocol,
        annotation_type="eyes-center",
        fixed_positions=None,
        dataset_original_directory=rc.get("bob.db.meds.directory", ""),
        dataset_original_extension=".jpg",
    ):

        # Downloading model if not exists
        urls = MEDSDatabase.urls()
        filename = get_file("meds.tar.gz",
                            urls,
                            file_hash="3b01354d4c170672ac14120b80dace75")

        super().__init__(
            name="meds",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=dataset_original_directory
                    if dataset_original_directory else "",
                    extension=dataset_original_extension,
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 5
0
    def __init__(self,
                 protocol,
                 annotation_type="eyes-center",
                 fixed_positions=None):

        # Downloading model if not exists
        urls = MultipieDatabase.urls()
        filename = get_file(
            "multipie.tar.gz",
            urls,
            file_hash="6c27c9616c2d0373c5f052b061d80178",
        )

        super().__init__(
            name="multipie",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=rc["bob.db.multipie.directory"]
                    if rc["bob.db.multipie.directory"] else "",
                    extension=".png",
                ),
                MultiposeAnnotations(),
            ),
            annotation_type=["eyes-center", "left-profile", "right-profile"],
            fixed_positions=fixed_positions,
        )
Exemplo n.º 6
0
    def __init__(self,
                 protocol,
                 annotation_type="eyes-center",
                 fixed_positions=None):

        # Downloading model if not exists
        urls = ARFaceDatabase.urls()
        filename = get_file(
            "arface.tar.gz",
            urls,
            file_hash="66cf05fe03adb8d73a76fd75641dd468",
        )

        super().__init__(
            name="arface",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=rc[
                        "bob.bio.face.arface.directory"]
                    if rc["bob.bio.face.arface.directory"] else "",
                    extension=rc["bob.bio.face.arface.extension"]
                    if rc["bob.bio.face.arface.extension"] else ".ppm",
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 7
0
    def __init__(self,
                 protocol,
                 annotation_type="eyes-center",
                 fixed_positions=None):

        # Downloading model if not exists
        urls = FRGCDatabase.urls()
        filename = get_file(
            "frgc.tar.gz",
            urls,
            file_hash="242168e993fe0f6f29bd59fccf3c79a0",
        )

        super().__init__(
            name="frgc",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=rc.get(
                        "bob.bio.face.frgc.directory", ""),
                    extension="",
                    reference_id_equal_subject_id=False,
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
            score_all_vs_all=True,
            group_probes_by_reference_id=True,
            memory_demanding=True,
        )

        self.hash_fn = hash_string
Exemplo n.º 8
0
    def __init__(
        self,
        protocol,
        annotation_type="eyes-center",
        fixed_positions=None,
        dataset_original_directory=rc.get("bob.db.morph.directory", ""),
        dataset_original_extension=".JPG",
    ):

        # Downloading model if not exists
        urls = MorphDatabase.urls()
        filename = get_file("morph.tar.gz",
                            urls,
                            file_hash="9efa1ff13ef6984ebfcf86f1b1f58873")

        super().__init__(
            name="morph",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=dataset_original_directory
                    if dataset_original_directory else "",
                    extension=dataset_original_extension,
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 9
0
    def __init__(
        self,
        protocol,
        dataset_original_directory=rc.get("bob.bio.face.vgg2.directory", ""),
        dataset_original_extension=rc.get("bob.bio.face.vgg2.extension",
                                          ".jpg"),
        annotation_type="eyes-center",
        fixed_positions=None,
    ):

        # Downloading model if not exists
        urls = VGG2Database.urls()
        filename = get_file("vgg2.tar.gz",
                            urls,
                            file_hash="4a05d797a326374a6b52bcd8d5a89d48")

        super().__init__(
            name="vgg2",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=dataset_original_directory,
                    extension=dataset_original_extension,
                ),
                VGG2Annotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 10
0
    def run(filename):
        znorm_dataset = CSVDatasetZTNorm(
            name="example_csv_filelist",
            dataset_protocol_path=filename,
            protocol="protocol_dev_eval",
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory="",
                    extension="",
                ),
                AnnotationsLoader(
                    annotation_directory=annotation_directory,
                    annotation_extension=".json",
                    annotation_type="json",
                ),
            ),
        )

        assert len(znorm_dataset.background_model_samples()) == 8
        assert check_all_true(
            znorm_dataset.background_model_samples(), DelayedSample
        )

        assert len(znorm_dataset.references()) == 2
        assert check_all_true(znorm_dataset.references(), SampleSet)

        assert len(znorm_dataset.probes()) == 8
        assert check_all_true(znorm_dataset.references(), SampleSet)

        assert len(znorm_dataset.references(group="eval")) == 6
        assert check_all_true(znorm_dataset.references(group="eval"), SampleSet)

        assert len(znorm_dataset.probes(group="eval")) == 13
        assert check_all_true(znorm_dataset.probes(group="eval"), SampleSet)

        assert len(znorm_dataset.all_samples(groups=None)) == 47
        assert check_all_true(
            znorm_dataset.all_samples(groups=None), DelayedSample
        )

        # Check the annotations
        for s in znorm_dataset.all_samples(groups=None):
            assert isinstance(s.annotations, dict)

        assert len(znorm_dataset.reference_ids(group="dev")) == 2
        assert len(znorm_dataset.reference_ids(group="eval")) == 6
        assert len(znorm_dataset.groups()) == 3

        # Checking ZT-Norm stuff
        assert len(znorm_dataset.treferences()) == 2
        assert len(znorm_dataset.zprobes()) == 8

        assert len(znorm_dataset.treferences(proportion=0.5)) == 1
        assert len(znorm_dataset.zprobes(proportion=0.5)) == 4
Exemplo n.º 11
0
def test_atnt_experiment():

    dataset = CSVDataset(
        name="atnt",
        dataset_protocol_path=atnt_protocol_path,
        protocol="idiap_protocol",
        csv_to_sample_loader=CSVToSampleLoaderBiometrics(
            data_loader=data_loader,
            dataset_original_directory=ATNT_DATABASE.original_directory,
            extension=".pgm",
        ),
    )

    scores = run_experiment(dataset)
    assert len(scores) == 100
    assert np.alltrue([len(s) == 20] for s in scores)
Exemplo n.º 12
0
def test_csv_cross_validation_atnt():

    dataset = CSVDatasetCrossValidation(
        name="test",
        csv_file_name=atnt_protocol_path_cross_validation,
        random_state=0,
        test_size=0.8,
        csv_to_sample_loader=CSVToSampleLoaderBiometrics(
            data_loader=data_loader,
            dataset_original_directory=ATNT_DATABASE.original_directory,
            extension=".pgm",
        ),
    )
    assert len(dataset.background_model_samples()) == 80
    assert len(dataset.references("dev")) == 32
    assert len(dataset.probes("dev")) == 288
    assert len(dataset.all_samples(groups=None)) == 400
Exemplo n.º 13
0
    def run_cross_validation_experiment(test_size=0.9):
        dataset = CSVDatasetCrossValidation(
            name="atnt",
            csv_file_name=atnt_protocol_path_cross_validation,
            random_state=0,
            test_size=test_size,
            csv_to_sample_loader=CSVToSampleLoaderBiometrics(
                data_loader=data_loader,
                dataset_original_directory=ATNT_DATABASE.original_directory,
                extension=".pgm",
            ),
        )

        scores = run_experiment(dataset)
        assert len(scores) == int(
            total_identities
            * test_size
            * (samples_per_identity - samples_for_enrollment)
        )
Exemplo n.º 14
0
    def __init__(self,
                 protocol,
                 annotation_type="eyes-center",
                 fixed_positions=None):

        # Downloading model if not exists
        urls = CBSRNirVis2Database.urls()
        filename = get_file(
            "cbsr-nir-vis2.tar.gz",
            urls,
            file_hash="e4bda52ab6754556783d6730eccc2ae2",
        )

        directory = (rc["bob.db.cbsr-nir-vis-2.directory"]
                     if rc["bob.db.cbsr-nir-vis-2.directory"] else "")

        def load(filename):
            extensions = [".jpg", ".bmp"]
            for e in extensions:
                f = os.path.splitext(filename)[0]
                new_filename = f + e
                if os.path.exists(new_filename):
                    return bob.io.base.load(new_filename)
            else:
                raise ValueError("File `{0}` not found".format(
                    str(new_filename)))

        super().__init__(
            name="cbsr-nir-vis2",
            dataset_protocol_path=filename,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=load,
                    dataset_original_directory=directory,
                    extension=".jpg",
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 15
0
    def __init__(
        self,
        protocol="idiap_protocol",
        dataset_original_directory=None,
        **kwargs,
    ):

        # Downloading model if not exists
        dataset_protocol_path = pkg_resources.resource_filename(
            "bob.bio.base", "test/data"
        )
        if dataset_original_directory is None:
            path = get_file(
                "atnt_faces.zip",
                ["http://www.idiap.ch/software/bob/data/bob/att_faces.zip"],
                file_hash="6efb25cb0d40755e9492b9c012e3348d",
                cache_subdir="datasets/atnt",
                extract=True,
            )
            dataset_original_directory = str(Path(path).parent)

        super().__init__(
            name="atnt",
            dataset_protocol_path=dataset_protocol_path,
            protocol=protocol,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=bob.io.base.load,
                    dataset_original_directory=dataset_original_directory,
                    extension=".pgm",
                ),
            ),
            **kwargs,
        )
        # just expost original_directory for backward compatibility of tests
        self.original_directory = dataset_original_directory
        self.original_extension = ".pgm"
Exemplo n.º 16
0
    def __init__(
        self, protocol, annotation_type="eyes-center", fixed_positions=None
    ):

        # Downloading model if not exists
        urls = PolaThermalDatabase.urls()
        filename = get_file(
            "polathermal.tar.gz",
            urls,
            file_hash="4693149bc883debe5a9e1441a4f5f4ae",
        )

        directory = rc.get("bob.db.pola-thermal.directory", "")

        def load(path):
            """
            Images in this dataset are stored as 16-bit PNG [0-65535]
            and bob.bio.face assumes images are between 0 and 255,
            so we divide by 257: 65535 / 255 = 257
            """
            return bob.io.base.load(path) / 257

        super().__init__(
            name="polathermal",
            protocol=protocol,
            dataset_protocol_path=filename,
            csv_to_sample_loader=make_pipeline(
                CSVToSampleLoaderBiometrics(
                    data_loader=load,
                    dataset_original_directory=directory,
                    extension=".png",
                ),
                EyesAnnotations(),
            ),
            annotation_type=annotation_type,
            fixed_positions=fixed_positions,
        )
Exemplo n.º 17
0
def SpearBioDatabase(
    name: str,
    protocol: Optional[str] = None,
    dataset_protocol_path: Optional[str] = None,
    data_path: Optional[str] = None,
    data_ext: str = ".wav",
    annotations_path: Optional[str] = None,
    annotations_ext: str = ".json",
    force_sample_rate: Optional[int] = None,
    force_channel: Optional[int] = None,
    **kwargs,
):
    """Database interface for the bob.bio.spear datasets for speaker recognition.

    This database interface is meant to be used with bob.bio.base pipelines.

    Given a series of CSV files (or downloading them from the bob data server), it
    creates the Sample objects for each roles needed by the pipeline (enroll, probe),
    for different groups (train, dev, eval).

    Each sample contains:

        - `data`: the wav audio data,
        - `rate`: the sample rate of `data`,
        - (optional)`annotations`: some annotations loaded from files if
          `annotations_path` is provided.

    `protocol definition` files (CSV files) are not the `data` files (WAV files):

        - `protocol definition` files are a list of paths and corresponding reference
          name. They are available on the bob data server.
        - `data` files are the actual files of the dataset (pointed to by the definition
          files). They are not provided by bob.

    You have to set the bob configuration to the root folder of the data files using
    the following command:

    ``$ bob config set bob.db.<database_name>.directory <your_path_to_data>``

    The final data paths will be constructed with the bob.db.<database_name>.directory
    key, and the paths in the CSV protocol definition files.

    Parameters
    ----------

    name
        name of the database used for retrieving config keys and files.

    protocol
        protocol to use (sub-folder containing the protocol definition files).

    dataset_protocol_path
        Path to an existing protocol definition folder structure.
        If None: will download the definition files to a datasets folder in the path
        pointed by the ``bob_data_folder`` config (see
        :py:func:`bob.extension.download.get_file`).

    data_path
        Path to the data files of the database.
        If None: will use the path in the ``bob.db.<database_name>.directory`` config.

    data_ext
        File extension of the data files.

    annotations_path
        Path to the annotations files of the dataset, if available.
        If None: will not load any annotations (you could then annotate on the fly with
        a transformer).

    annotations_ext
        If annotations_path is provided, will load annotation using this extension.

    force_sample_rate
        If not None, will force the sample rate of the data to a specific value.
        Otherwise the sample rate will be specified by each loaded file.

    force_channel
        If not None, will force to load the nth channel of each file. If None and the
        samples have a ``channel`` attribute, this channel will be loaded, and
        otherwise all channels will be loaded in a 2D array if multiple are present.
    """

    if dataset_protocol_path is None:
        dataset_protocol_path = get_protocol_file(name)

    logger.info(
        f"Database: Will read the CSV protocol definitions in '{dataset_protocol_path}'."
    )

    rc_db_name = known_databases.get(name, {}).get("rc_name", name)

    if data_path is None:
        data_path = rc.get(f"bob.db.{rc_db_name}.directory")
    if data_path is None:
        raise RuntimeError(
            f"No data path was provided! Either set 'bob.db.{rc_db_name}.directory' "
            "with the 'bob config set' command, or provide a 'data_path' to "
            "'SpearBioDatabase'."
        )

    logger.info(f"Database: Will read raw data files in '{data_path}'.")

    # Define the data loading transformers

    # Load a path into the data of the sample
    sample_loader = CSVToSampleLoaderBiometrics(
        data_loader=path_loader,
        dataset_original_directory=data_path,
        extension=data_ext,
        reference_id_equal_subject_id=name not in ["voxceleb"],
    )

    # Read the file at path and set the data and metadata of a sample
    path_to_sample = PathToAudio(
        forced_channel=force_channel, forced_sr=force_sample_rate
    )

    # Build the data loading pipeline
    if annotations_path is None:
        sample_loader = Pipeline(
            [
                ("db:reader_loader", sample_loader),
                ("db:path_to_sample", path_to_sample),
            ]
        )
    else:
        logger.info(
            f"Database: Will read annotation files in '{annotations_path}'."
        )
        annotations_transformer = AnnotationsLoader(
            annotation_directory=annotations_path,
            annotation_extension=annotations_ext,
        )
        sample_loader = Pipeline(
            [
                ("db:reader_loader", sample_loader),
                ("db:path_to_sample", path_to_sample),
                ("db:annotations_loader", annotations_transformer),
            ]
        )

    return CSVDataset(
        name=name,
        protocol=protocol,
        dataset_protocol_path=dataset_protocol_path,
        csv_to_sample_loader=sample_loader,
        score_all_vs_all=name not in ["voxceleb"],
        is_sparse=name in ["voxceleb"],
        **kwargs,
    )
Exemplo n.º 18
0
def test_csv_file_list_dev_eval_sparse():

    annotation_directory = os.path.realpath(
        bob.io.base.test_utils.datafile(
            ".", __name__, "data/example_csv_filelist/annotations"
        )
    )

    dataset = CSVDataset(
        name="example_csv_filelist",
        dataset_protocol_path=example_dir,
        protocol="protocol_dev_eval_sparse",
        csv_to_sample_loader=make_pipeline(
            CSVToSampleLoaderBiometrics(
                data_loader=bob.io.base.load,
                dataset_original_directory="",
                extension="",
            ),
            AnnotationsLoader(
                annotation_directory=annotation_directory,
                annotation_extension=".json",
                annotation_type="json",
            ),
        ),
        is_sparse=True,
    )

    assert len(dataset.background_model_samples()) == 8
    assert check_all_true(dataset.background_model_samples(), DelayedSample)

    assert len(dataset.references()) == 2
    assert check_all_true(dataset.references(), SampleSet)

    probes = dataset.probes()
    assert len(probes) == 8

    # here, 1 comparisons comparison per probe
    for p in probes:
        assert len(p.references) == 1
    assert check_all_true(dataset.references(), SampleSet)

    assert len(dataset.references(group="eval")) == 6
    assert check_all_true(dataset.references(group="eval"), SampleSet)

    probes = dataset.probes(group="eval")
    assert len(probes) == 13
    assert check_all_true(probes, SampleSet)
    # Here, 1 comparison per probe, EXPECT THE FIRST ONE
    for i, p in enumerate(probes):
        if i == 0:
            assert len(p.references) == 2
        else:
            assert len(p.references) == 1

    assert len(dataset.all_samples(groups=None)) == 48
    assert check_all_true(dataset.all_samples(groups=None), DelayedSample)

    # Check the annotations
    for s in dataset.all_samples(groups=None):
        assert isinstance(s.annotations, dict)

    assert len(dataset.reference_ids(group="dev")) == 2
    assert len(dataset.reference_ids(group="eval")) == 6

    assert len(dataset.groups()) == 3