def probes(self, group="dev"): if self.protocol not in self.probes_dict: if self.annotations is None: self.annotations = load_annotations( search_file(self.filename, "alleyes.csv")) f = search_file(self.filename, f"GBU_{self.protocol}_Query.xml") reference_ids = [x.reference_id for x in self.references()] self.probes_dict[ self.protocol] = self._make_sampleset_from_filedict( read_list(f), reference_ids) return self.probes_dict[self.protocol]
def references(self, group="dev"): if self.protocol not in self.references_dict: if self.annotations is None: self.annotations = load_annotations( search_file(self.filename, "alleyes.csv")) f = search_file(self.filename, f"GBU_{self.protocol}_Target.xml") self.references_dict[ self.protocol] = self._make_sampleset_from_filedict( read_list(f), ) return self.references_dict[self.protocol]
def background_model_samples(self): if self.background_samples is None: if self.annotations is None: self.annotations = load_annotations( search_file(self.filename, "alleyes.csv")) # for self.background_samples = [] for b_files in self._background_files: f = search_file(self.filename, f"{b_files}") self.background_samples += self._make_sampleset_from_filedict( read_list(f)) return self.background_samples
def __init__(self, **kwargs): super().__init__(**kwargs) # create_cache self.cache["znorm_csv_dev"] = None self.cache["znorm_csv_eval"] = None self.cache["tnorm_csv"] = None znorm_csv = search_file( self.dataset_protocol_path, [ os.path.join(self.protocol, "norm", "for_znorm.lst"), os.path.join(self.protocol, "norm", "for_znorm.csv"), ], ) tnorm_csv = search_file( self.dataset_protocol_path, [ os.path.join(self.protocol, "norm", "for_tnorm.lst"), os.path.join(self.protocol, "norm", "for_tnorm.csv"), ], ) if znorm_csv is None: raise ValueError( f"The file `for_znorm.lst` is required and it was not found in `{self.protocol}/norm` " ) if tnorm_csv is None: raise ValueError( f"The file `for_tnorm.csv` is required and it was not found `{self.protocol}/norm`" ) self.znorm_csv_dev = znorm_csv self.znorm_csv_eval = znorm_csv self.tnorm_csv = tnorm_csv
def __init__( self, database_path=rc.get("bob.bio.face.webface42M.directory", ""), transform=None, ): self.database_path = database_path if database_path == "": raise ValueError( "`database_path` is empty; please do `bob config set bob.bio.face.webface42M.directory` to set the absolute path of the data" ) urls = WebFace42M.urls() filename = get_file( "webface42M.tar.gz", urls, file_hash="50c32cbe61de261466e1ea3af2721cea", ) self.file = search_file(filename, "webface42M.csv") self._line_offset = 51 self.transform = transform
def download_voxforge( protocol_definition, destination, force_download, **kwargs ): """Downloads a series of VoxForge data files from their repository and untar them. The files will be downloaded and saved in the `destination` folder then extracted. The list of URLs is provided in the protocol definition file of Voxforge. """ destination = Path(destination) destination.mkdir(exist_ok=True) if protocol_definition is None: protocol_definition = get_protocol_file("voxforge") # Use the `Default` protocol protocol = "Default" # Open the list file list_file = f"{protocol}/data_files_urls.csv" open_list_file = search_file(protocol_definition, [list_file]) num_files = sum(1 for _ in open_list_file) - 1 open_list_file.seek(0, 0) logger.info(f"{num_files} files are listed in {list_file}. Downloading...") csv_list_file = csv.DictReader(open_list_file) for row in tqdm(csv_list_file, total=num_files): full_filename = destination / row["filename"] if force_download or not full_filename.exists(): logger.debug(f"Downloading {row['filename']} from {row['url']}") download_and_unzip(urls=[row["url"]], filename=full_filename) logger.debug(f"Downloaded to {full_filename}") logger.info(f"Download of {num_files} files completed.") open_list_file.close()
def list_file(self, group): list_file = search_file( self.dataset_protocols_path, os.path.join(self.protocol, group + ".csv"), ) return list_file
def read_annotation_file(file_name, annotation_type): """This function provides default functionality to read annotation files. Parameters ---------- file_name : str The full path of the annotation file to read. The path can also be like ``base_path:relative_path`` where the base_path can be both a directory or a tarball. This allows you to read annotations from inside a tarball. annotation_type : str The type of the annotation file that should be read. The following annotation_types are supported: * ``eyecenter``: The file contains a single row with four entries: ``re_x re_y le_x le_y`` * ``named``: The file contains named annotations, one per line, e.g.: ``reye re_x re_y`` or ``pose 25.7`` * ``idiap``: The file contains enumerated annotations, one per line, e.g.: ``1 key1_x key1_y``, and maybe some additional annotations like gender, age, ... * ``json``: The file contains annotations of any format, dumped in a text json file. Returns ------- dict A python dictionary with the keypoint name as key and the position ``(y,x)`` as value, and maybe some additional annotations. Raises ------ IOError If the annotation file is not found. ValueError If the annotation type is not known. """ if not file_name: return None if ":" in file_name: base_path, tail = file_name.split(":", maxsplit=1) f = search_file(base_path, [tail]) else: if not os.path.exists(file_name): raise IOError("The annotation file '%s' was not found" % file_name) f = open(file_name) annotations = {} try: if str(annotation_type) == 'eyecenter': # only the eye positions are written, all are in the first row line = f.readline() positions = line.split() assert len(positions) == 4 annotations['reye'] = (float(positions[1]), float(positions[0])) annotations['leye'] = (float(positions[3]), float(positions[2])) elif str(annotation_type) == 'named': # multiple lines, no header line, each line contains annotation and # position or single value annotation for line in f: positions = line.split() if len(positions) == 3: annotations[positions[0]] = (float(positions[2]), float(positions[1])) elif len(positions) == 2: annotations[positions[0]] = float(positions[1]) else: logger.error( "Could not interpret line '%s' in annotation file '%s'", line, file_name) elif str(annotation_type) == 'idiap': # Idiap format: multiple lines, no header, each line contains an integral # keypoint identifier, or other identifier like 'gender', 'age',... for line in f: positions = line.rstrip().split() if positions: if positions[0].isdigit(): # position field assert len(positions) == 3 id = int(positions[0]) annotations[_idiap_annotations[id]] = (float( positions[2]), float(positions[1])) else: # another field, we take the first entry as key and the rest as # values annotations[positions[0]] = positions[1:] # finally, we add the eye center coordinates as the center between the # eye corners; the annotations 3 and 8 are the pupils... if 'reyeo' in annotations and 'reyei' in annotations: annotations['reye'] = ( (annotations['reyeo'][0] + annotations['reyei'][0]) / 2., (annotations['reyeo'][1] + annotations['reyei'][1]) / 2.) if 'leyeo' in annotations and 'leyei' in annotations: annotations['leye'] = ( (annotations['leyeo'][0] + annotations['leyei'][0]) / 2., (annotations['leyeo'][1] + annotations['leyei'][1]) / 2.) elif str(annotation_type) == 'json': annotations = json.load(f, object_pairs_hook=collections.OrderedDict) else: raise ValueError( "The given annotation type '%s' is not known, choose one of ('eyecenter', 'named', 'idiap')" % annotation_type) finally: f.close() if annotations is not None and 'leye' in annotations and 'reye' in annotations and annotations[ 'leye'][1] < annotations['reye'][1]: logger.warn("The eye annotations in file '%s' might be exchanged!" % file_name) return annotations
def get_paths(): if not os.path.exists(dataset_protocol_path): raise ValueError( f"The path `{dataset_protocol_path}` was not found" ) # Here we are handling the legacy train_csv = search_file( dataset_protocol_path, [ os.path.join(name, protocol, "norm", "train_world.lst"), os.path.join(name, protocol, "norm", "train_world.csv"), ], ) dev_enroll_csv = search_file( dataset_protocol_path, [ os.path.join(name, protocol, "dev", "for_models.lst"), os.path.join(name, protocol, "dev", "for_models.csv"), ], ) legacy_probe = ( "for_scores.lst" if self.is_sparse else "for_probes.lst" ) dev_probe_csv = search_file( dataset_protocol_path, [ os.path.join(name, protocol, "dev", legacy_probe), os.path.join(name, protocol, "dev", "for_probes.csv"), ], ) eval_enroll_csv = search_file( dataset_protocol_path, [ os.path.join(name, protocol, "eval", "for_models.lst"), os.path.join(name, protocol, "eval", "for_models.csv"), ], ) eval_probe_csv = search_file( dataset_protocol_path, [ os.path.join(name, protocol, "eval", legacy_probe), os.path.join(name, protocol, "eval", "for_probes.csv"), ], ) # The minimum required is to have `dev_enroll_csv` and `dev_probe_csv` # Dev if dev_enroll_csv is None: raise ValueError( f"The file `{dev_enroll_csv}` is required and it was not found" ) if dev_probe_csv is None: raise ValueError( f"The file `{dev_probe_csv}` is required and it was not found" ) return ( train_csv, dev_enroll_csv, dev_probe_csv, eval_enroll_csv, eval_probe_csv, )