def get_results_file_name(boundaries_id, labels_id, config, ds_name): """Based on the config and the dataset, get the file name to store the results.""" if ds_name == "*": ds_name = "All" utils.ensure_dir(msaf.results_dir) file_name = os.path.join(msaf.results_dir, "results_%s" % ds_name) file_name += "_boundsE%s_labelsE%s" % (boundaries_id, labels_id) sorted_keys = sorted(config.keys(), cmp=lambda x, y: cmp(x.lower(), y.lower())) for key in sorted_keys: file_name += "_%sE%s" % (key, str(config[key]).replace("/", "_")) return file_name + msaf.results_ext
def get_results_file_name(boundaries_id, labels_id, config, annotator_id): """Based on the config and the dataset, get the file name to store the results.""" utils.ensure_dir(msaf.config.results_dir) file_name = os.path.join(msaf.config.results_dir, "results") file_name += "_boundsE%s_labelsE%s" % (boundaries_id, labels_id) file_name += "_annotatorE%d" % (annotator_id) sorted_keys = sorted(config.keys(), key=str.lower) for key in sorted_keys: file_name += "_%sE%s" % (key, str(config[key]).replace("/", "_")) # Check for max file length if len(file_name) > 255 - len(msaf.config.results_ext): file_name = file_name[:255 - len(msaf.config.results_ext)] return file_name + msaf.config.results_ext
def get_results_file_name(boundaries_id, labels_id, config, ds_name, annotator_id): """Based on the config and the dataset, get the file name to store the results.""" utils.ensure_dir(msaf.config.results_dir) file_name = os.path.join(msaf.config.results_dir, "results") file_name += "_boundsE%s_labelsE%s" % (boundaries_id, labels_id) file_name += "_annotatorE%d" % (annotator_id) sorted_keys = sorted(config.keys(), key=str.lower) for key in sorted_keys: file_name += "_%sE%s" % (key, str(config[key]).replace("/", "_")) # Check for max file length if len(file_name) > 255 - len(msaf.config.results_ext): file_name = file_name[:255 - len(msaf.config.results_ext)] return file_name + msaf.config.results_ext
def process(in_path, sonify_beats=False, n_jobs=1, overwrite=False, out_file="out.json", out_beats="out_beats.wav", ds_name="*"): """Main process to compute features. Parameters ---------- in_path: str Path to the file or dataset to compute the features. sonify_beats: bool Whether to sonify the beats on top of the audio file (single file mode only). n_jobs: int Number of threads (collection mode only). overwrite: bool Whether to overwrite the previously computed features. out_file: str Path to the output json file (single file mode only). out_beats: str Path to the new file containing the sonified beats. ds_name: str Name of the prefix of the dataset (e.g., Beatles) """ # If in_path it's a file, we only compute one file if os.path.isfile(in_path): file_struct = FileStruct(in_path) file_struct.features_file = out_file compute_all_features(file_struct, sonify_beats, overwrite, out_beats) elif os.path.isdir(in_path): # Check that in_path exists utils.ensure_dir(in_path) # Get files file_structs = io.get_dataset_files(in_path, ds_name=ds_name) # Compute features using joblib Parallel(n_jobs=n_jobs)(delayed(compute_all_features)( file_struct, sonify_beats, overwrite, out_beats) for file_struct in file_structs)
def process(in_path, audio_beats=False, n_jobs=1, overwrite=False): """Main process.""" # If in_path it's a file, we only compute one file if os.path.isfile(in_path): compute_all_features(in_path, audio_beats, overwrite) elif os.path.isdir(in_path): # Check that in_path exists utils.ensure_dir(in_path) # Get files file_structs = io.get_dataset_files(in_path) # Compute features using joblib Parallel(n_jobs=n_jobs)(delayed(compute_all_features)( file_struct, audio_beats, overwrite) for file_struct in file_structs)
def process( in_path, sonify_beats=False, n_jobs=1, overwrite=False, out_file="out.json", out_beats="out_beats.wav", ds_name="*" ): """Main process to compute features. Parameters ---------- in_path: str Path to the file or dataset to compute the features. sonify_beats: bool Whether to sonify the beats on top of the audio file (single file mode only). n_jobs: int Number of threads (collection mode only). overwrite: bool Whether to overwrite the previously computed features. out_file: str Path to the output json file (single file mode only). out_beats: str Path to the new file containing the sonified beats. ds_name: str Name of the prefix of the dataset (e.g., Beatles) """ # If in_path it's a file, we only compute one file if os.path.isfile(in_path): file_struct = FileStruct(in_path) file_struct.features_file = out_file compute_all_features(file_struct, sonify_beats, overwrite, out_beats) elif os.path.isdir(in_path): # Check that in_path exists utils.ensure_dir(in_path) # Get files file_structs = io.get_dataset_files(in_path, ds_name=ds_name) # Compute features using joblib Parallel(n_jobs=n_jobs)( delayed(compute_all_features)(file_struct, sonify_beats, overwrite, out_beats) for file_struct in file_structs )
def get_dataset_files(in_path, ds_name="*"): """Gets the files of the dataset with a prefix of ds_name.""" # All datasets ds_dict = { "Beatles" : "Isophonics", "Cerulean" : "Cerulean", "Epiphyte" : "Epiphyte", "Isophonics": "Isophonics", "SALAMI" : "SALAMI", "SALAMI-i" : "SALAMI", "*" : "*" } try: prefix = ds_dict[ds_name] except KeyError: raise RuntimeError("Dataset %s is not valid. Valid datasets are: %s" % (ds_name, ds_dict.keys())) # Get audio files audio_files = [] for ext in msaf.Dataset.audio_exts: audio_files += glob.glob(os.path.join(in_path, msaf.Dataset.audio_dir, ("%s_*" + ext) % prefix)) # Check for datasets with different prefix if len(audio_files) == 0: for ext in msaf.Dataset.audio_exts: audio_files += glob.glob(os.path.join(in_path, msaf.Dataset.audio_dir, "*" + ext)) # Make sure directories exist utils.ensure_dir(os.path.join(in_path, msaf.Dataset.features_dir)) utils.ensure_dir(os.path.join(in_path, msaf.Dataset.estimations_dir)) utils.ensure_dir(os.path.join(in_path, msaf.Dataset.references_dir)) # Get the file structs file_structs = [] for audio_file in audio_files: file_structs.append(FileStruct(audio_file)) # Filter by the beatles if ds_name == "Beatles": file_structs = filter_by_artist(file_structs, "The Beatles") # Salami Internet hack if ds_name == "SALAMI-i": file_structs = get_SALAMI_internet(file_structs) # Sort by audio file name file_structs = sorted(file_structs, key=lambda file_struct: file_struct.audio_file) return file_structs
def get_dataset_files(in_path, ds_name="*"): """Gets the files of the dataset with a prefix of ds_name.""" # All datasets ds_dict = { "Beatles": "Isophonics", "Cerulean": "Cerulean", "Epiphyte": "Epiphyte", "Isophonics": "Isophonics", "SALAMI": "SALAMI", "SALAMI-i": "SALAMI", "*": "*" } try: prefix = ds_dict[ds_name] except KeyError: raise RuntimeError("Dataset %s is not valid. Valid datasets are: %s" % (ds_name, ds_dict.keys())) # Get audio files audio_files = [] for ext in msaf.Dataset.audio_exts: audio_files += glob.glob( os.path.join(in_path, msaf.Dataset.audio_dir, ("%s_*" + ext) % prefix)) # Check for datasets with different prefix if len(audio_files) == 0: for ext in msaf.Dataset.audio_exts: audio_files += glob.glob( os.path.join(in_path, msaf.Dataset.audio_dir, "*" + ext)) # Make sure directories exist utils.ensure_dir(os.path.join(in_path, msaf.Dataset.features_dir)) utils.ensure_dir(os.path.join(in_path, msaf.Dataset.estimations_dir)) utils.ensure_dir(os.path.join(in_path, msaf.Dataset.references_dir)) # Get the file structs file_structs = [] for audio_file in audio_files: file_structs.append(FileStruct(audio_file)) # Filter by the beatles if ds_name == "Beatles": file_structs = filter_by_artist(file_structs, "The Beatles") # Salami Internet hack if ds_name == "SALAMI-i": file_structs = get_SALAMI_internet(file_structs) # Sort by audio file name file_structs = sorted(file_structs, key=lambda file_struct: file_struct.audio_file) return file_structs
def get_dataset_files(in_path, ds_name="*"): """Gets the files of the dataset with a prefix of ds_name.""" # All datasets ds_dict = { "Beatles" : "Isophonics", "Cerulean" : "Cerulean", "Epiphyte" : "Epiphyte", "Isophonics": "Isophonics", "SALAMI" : "SALAMI", "SALAMI-i" : "SALAMI", "CJ": "CJ", "*" : "*" } try: prefix = ds_dict[ds_name] except KeyError: raise RuntimeError("Dataset %s is not valid. Valid datasets are: %s" % (ds_name, ds_dict.keys())) audio_files = [x for x in os.listdir(os.path.join(in_path, msaf.Dataset.audio_dir)) if (not x.startswith('.') and (x.endswith('.aif') or \ x.endswith('.wav') or x.endswith('.mp3')) )] audio_files = [os.path.join(in_path, msaf.Dataset.audio_dir, x) for x in audio_files] # Make sure directories exist utils.ensure_dir(os.path.join(in_path, msaf.Dataset.features_dir)) utils.ensure_dir(os.path.join(in_path, msaf.Dataset.estimations_dir)) utils.ensure_dir(os.path.join(in_path, msaf.Dataset.references_dir)) # Get the file structs file_structs = [] for audio_file in audio_files: file_structs.append(FileStruct(audio_file)) # Filter by the beatles if ds_name == "Beatles": file_structs = filter_by_artist(file_structs, "The Beatles") # Salami Internet hack if ds_name == "SALAMI-i": file_structs = get_SALAMI_internet(file_structs) # Sort by audio file name file_structs = sorted(file_structs, key=lambda file_struct: file_struct.audio_file) return file_structs
def get_dataset_files(in_path): """Gets the files of the given dataset.""" # Get audio files audio_files = [] for ext in ds_config.audio_exts: audio_files += glob.glob( os.path.join(in_path, ds_config.audio_dir, "*" + ext)) # Make sure directories exist utils.ensure_dir(os.path.join(in_path, ds_config.features_dir)) utils.ensure_dir(os.path.join(in_path, ds_config.estimations_dir)) utils.ensure_dir(os.path.join(in_path, ds_config.references_dir)) # Get the file structs file_structs = [] for audio_file in audio_files: file_structs.append(FileStruct(audio_file)) # Sort by audio file name file_structs = sorted(file_structs, key=lambda file_struct: file_struct.audio_file) return file_structs