示例#1
0
    def probes(self, group="dev"):
        if self.protocol not in self.probes_dict:
            if self.annotations is None:
                self.annotations = load_annotations(
                    search_file(self.filename, "alleyes.csv"))

            f = search_file(self.filename, f"GBU_{self.protocol}_Query.xml")
            reference_ids = [x.reference_id for x in self.references()]

            self.probes_dict[
                self.protocol] = self._make_sampleset_from_filedict(
                    read_list(f), reference_ids)
        return self.probes_dict[self.protocol]
示例#2
0
    def references(self, group="dev"):

        if self.protocol not in self.references_dict:

            if self.annotations is None:
                self.annotations = load_annotations(
                    search_file(self.filename, "alleyes.csv"))

            f = search_file(self.filename, f"GBU_{self.protocol}_Target.xml")
            self.references_dict[
                self.protocol] = self._make_sampleset_from_filedict(
                    read_list(f), )

        return self.references_dict[self.protocol]
示例#3
0
    def background_model_samples(self):
        if self.background_samples is None:
            if self.annotations is None:
                self.annotations = load_annotations(
                    search_file(self.filename, "alleyes.csv"))
            # for
            self.background_samples = []

            for b_files in self._background_files:

                f = search_file(self.filename, f"{b_files}")

                self.background_samples += self._make_sampleset_from_filedict(
                    read_list(f))
        return self.background_samples
示例#4
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # create_cache
        self.cache["znorm_csv_dev"] = None
        self.cache["znorm_csv_eval"] = None
        self.cache["tnorm_csv"] = None

        znorm_csv = search_file(
            self.dataset_protocol_path,
            [
                os.path.join(self.protocol, "norm", "for_znorm.lst"),
                os.path.join(self.protocol, "norm", "for_znorm.csv"),
            ],
        )

        tnorm_csv = search_file(
            self.dataset_protocol_path,
            [
                os.path.join(self.protocol, "norm", "for_tnorm.lst"),
                os.path.join(self.protocol, "norm", "for_tnorm.csv"),
            ],
        )

        if znorm_csv is None:
            raise ValueError(
                f"The file `for_znorm.lst` is required and it was not found in `{self.protocol}/norm` "
            )

        if tnorm_csv is None:
            raise ValueError(
                f"The file `for_tnorm.csv` is required and it was not found `{self.protocol}/norm`"
            )

        self.znorm_csv_dev = znorm_csv
        self.znorm_csv_eval = znorm_csv
        self.tnorm_csv = tnorm_csv
示例#5
0
    def __init__(
        self,
        database_path=rc.get("bob.bio.face.webface42M.directory", ""),
        transform=None,
    ):
        self.database_path = database_path

        if database_path == "":
            raise ValueError(
                "`database_path` is empty; please do `bob config set bob.bio.face.webface42M.directory` to set the absolute path of the data"
            )

        urls = WebFace42M.urls()
        filename = get_file(
            "webface42M.tar.gz",
            urls,
            file_hash="50c32cbe61de261466e1ea3af2721cea",
        )
        self.file = search_file(filename, "webface42M.csv")

        self._line_offset = 51
        self.transform = transform
示例#6
0
def download_voxforge(
    protocol_definition, destination, force_download, **kwargs
):
    """Downloads a series of VoxForge data files from their repository and untar them.

    The files will be downloaded and saved in the `destination` folder then extracted.

    The list of URLs is provided in the protocol definition file of Voxforge.
    """

    destination = Path(destination)
    destination.mkdir(exist_ok=True)

    if protocol_definition is None:
        protocol_definition = get_protocol_file("voxforge")

    # Use the `Default` protocol
    protocol = "Default"

    # Open the list file
    list_file = f"{protocol}/data_files_urls.csv"
    open_list_file = search_file(protocol_definition, [list_file])

    num_files = sum(1 for _ in open_list_file) - 1
    open_list_file.seek(0, 0)
    logger.info(f"{num_files} files are listed in {list_file}. Downloading...")

    csv_list_file = csv.DictReader(open_list_file)

    for row in tqdm(csv_list_file, total=num_files):
        full_filename = destination / row["filename"]
        if force_download or not full_filename.exists():
            logger.debug(f"Downloading {row['filename']} from {row['url']}")
            download_and_unzip(urls=[row["url"]], filename=full_filename)
            logger.debug(f"Downloaded to {full_filename}")

    logger.info(f"Download of {num_files} files completed.")
    open_list_file.close()
示例#7
0
 def list_file(self, group):
     list_file = search_file(
         self.dataset_protocols_path,
         os.path.join(self.protocol, group + ".csv"),
     )
     return list_file
示例#8
0
def read_annotation_file(file_name, annotation_type):
    """This function provides default functionality to read annotation files.

  Parameters
  ----------
  file_name : str
      The full path of the annotation file to read. The path can also be like
      ``base_path:relative_path`` where the base_path can be both a directory or
      a tarball. This allows you to read annotations from inside a tarball.
  annotation_type : str
      The type of the annotation file that should be read. The following
      annotation_types are supported:

        * ``eyecenter``: The file contains a single row with four entries:
          ``re_x re_y le_x le_y``
        * ``named``: The file contains named annotations, one per line, e.g.:
          ``reye re_x re_y`` or ``pose 25.7``
        * ``idiap``: The file contains enumerated annotations, one per line,
          e.g.: ``1 key1_x key1_y``, and maybe some additional annotations like
          gender, age, ...
        * ``json``: The file contains annotations of any format, dumped in a
          text json file.

  Returns
  -------
  dict
      A python dictionary with the keypoint name as key and the
      position ``(y,x)`` as value, and maybe some additional annotations.

  Raises
  ------
  IOError
      If the annotation file is not found.
  ValueError
      If the annotation type is not known.
  """
    if not file_name:
        return None

    if ":" in file_name:
        base_path, tail = file_name.split(":", maxsplit=1)
        f = search_file(base_path, [tail])
    else:
        if not os.path.exists(file_name):
            raise IOError("The annotation file '%s' was not found" % file_name)
        f = open(file_name)

    annotations = {}

    try:

        if str(annotation_type) == 'eyecenter':
            # only the eye positions are written, all are in the first row
            line = f.readline()
            positions = line.split()
            assert len(positions) == 4
            annotations['reye'] = (float(positions[1]), float(positions[0]))
            annotations['leye'] = (float(positions[3]), float(positions[2]))

        elif str(annotation_type) == 'named':
            # multiple lines, no header line, each line contains annotation and
            # position or single value annotation
            for line in f:
                positions = line.split()
                if len(positions) == 3:
                    annotations[positions[0]] = (float(positions[2]),
                                                 float(positions[1]))
                elif len(positions) == 2:
                    annotations[positions[0]] = float(positions[1])
                else:
                    logger.error(
                        "Could not interpret line '%s' in annotation file '%s'",
                        line, file_name)

        elif str(annotation_type) == 'idiap':
            # Idiap format: multiple lines, no header, each line contains an integral
            # keypoint identifier, or other identifier like 'gender', 'age',...
            for line in f:
                positions = line.rstrip().split()
                if positions:
                    if positions[0].isdigit():
                        # position field
                        assert len(positions) == 3
                        id = int(positions[0])
                        annotations[_idiap_annotations[id]] = (float(
                            positions[2]), float(positions[1]))
                    else:
                        # another field, we take the first entry as key and the rest as
                        # values
                        annotations[positions[0]] = positions[1:]
            # finally, we add the eye center coordinates as the center between the
            # eye corners; the annotations 3 and 8 are the pupils...
            if 'reyeo' in annotations and 'reyei' in annotations:
                annotations['reye'] = (
                    (annotations['reyeo'][0] + annotations['reyei'][0]) / 2.,
                    (annotations['reyeo'][1] + annotations['reyei'][1]) / 2.)
            if 'leyeo' in annotations and 'leyei' in annotations:
                annotations['leye'] = (
                    (annotations['leyeo'][0] + annotations['leyei'][0]) / 2.,
                    (annotations['leyeo'][1] + annotations['leyei'][1]) / 2.)

        elif str(annotation_type) == 'json':
            annotations = json.load(f,
                                    object_pairs_hook=collections.OrderedDict)
        else:
            raise ValueError(
                "The given annotation type '%s' is not known, choose one of ('eyecenter', 'named', 'idiap')"
                % annotation_type)
    finally:
        f.close()

    if annotations is not None and 'leye' in annotations and 'reye' in annotations and annotations[
            'leye'][1] < annotations['reye'][1]:
        logger.warn("The eye annotations in file '%s' might be exchanged!" %
                    file_name)

    return annotations
示例#9
0
        def get_paths():

            if not os.path.exists(dataset_protocol_path):
                raise ValueError(
                    f"The path `{dataset_protocol_path}` was not found"
                )

            # Here we are handling the legacy
            train_csv = search_file(
                dataset_protocol_path,
                [
                    os.path.join(name, protocol, "norm", "train_world.lst"),
                    os.path.join(name, protocol, "norm", "train_world.csv"),
                ],
            )

            dev_enroll_csv = search_file(
                dataset_protocol_path,
                [
                    os.path.join(name, protocol, "dev", "for_models.lst"),
                    os.path.join(name, protocol, "dev", "for_models.csv"),
                ],
            )

            legacy_probe = (
                "for_scores.lst" if self.is_sparse else "for_probes.lst"
            )
            dev_probe_csv = search_file(
                dataset_protocol_path,
                [
                    os.path.join(name, protocol, "dev", legacy_probe),
                    os.path.join(name, protocol, "dev", "for_probes.csv"),
                ],
            )

            eval_enroll_csv = search_file(
                dataset_protocol_path,
                [
                    os.path.join(name, protocol, "eval", "for_models.lst"),
                    os.path.join(name, protocol, "eval", "for_models.csv"),
                ],
            )

            eval_probe_csv = search_file(
                dataset_protocol_path,
                [
                    os.path.join(name, protocol, "eval", legacy_probe),
                    os.path.join(name, protocol, "eval", "for_probes.csv"),
                ],
            )

            # The minimum required is to have `dev_enroll_csv` and `dev_probe_csv`

            # Dev
            if dev_enroll_csv is None:
                raise ValueError(
                    f"The file `{dev_enroll_csv}` is required and it was not found"
                )

            if dev_probe_csv is None:
                raise ValueError(
                    f"The file `{dev_probe_csv}` is required and it was not found"
                )

            return (
                train_csv,
                dev_enroll_csv,
                dev_probe_csv,
                eval_enroll_csv,
                eval_probe_csv,
            )