def run_algorithms(audio_file, boundaries_id, labels_id, config): """Runs the algorithms with the specified identifiers on the audio_file.""" # Get the corresponding modules bounds_module = get_boundaries_module(boundaries_id) labels_module = get_labels_module(labels_id) # Segment using the specified boundaries and labels if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(audio_file, **config) est_times, est_labels = S.process() else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(audio_file, in_labels=[], **config) est_times, est_labels = S.process() else: try: est_times, est_labels = io.read_references(audio_file) except: logging.warning("No references found for file: %s" % audio_file) return [], [] # Label segments if labels_module is not None: S = labels_module.Segmenter(audio_file, in_bound_times=est_times, **config) est_times, est_labels = S.process() return est_times, est_labels
def run_flat(file_struct, bounds_module, labels_module, frame_times, config, annotator_id): """Runs the flat algorithms with the specified identifiers on the audio_file. See run_algorithm for more information. """ # Get features to make code nicer features = config["features"].features # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(file_struct, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(file_struct, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: # Ground-truth boundaries est_times, est_labels = io.read_references( file_struct.audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) except IOError: logging.warning("No references found for file: %s" % file_struct.audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: # two segents only - no need of clustering est_labels = np.array([0]) wfmcs = np.arange(2 * len(est_labels)).reshape(len(est_labels), 2) # dummy to satisfy output of method else: S = labels_module.Segmenter(file_struct, in_bound_idxs=est_idxs, **config) _, est_labels, wfmcs = S.processFlat() # Make sure the first and last boundaries are included if bounds_module: # assume that for ground truth boundaries first and last are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, features.shape[0], frame_times, config["features"].dur) else: est_times = np.array(est_times) return est_times, est_labels, wfmcs return est_times, est_labels
def run_flat(audio_file, bounds_module, labels_module, frame_times, config, annotator_id): """Runs the flat algorithms with the specified identifiers on the audio_file. See run_algorithm for more information. """ # Get features to make code nicer features = config["features"] # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(audio_file, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: est_times, est_labels = io.read_references( audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times[:-1]) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) if est_idxs[-1] != features["hpcp"].shape[0] - 1: est_idxs = np.concatenate(( est_idxs, [features["hpcp"].shape[0] - 1])) except: logging.warning("No references found for file: %s" % audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: est_labels = np.array([0]) else: S = labels_module.Segmenter(audio_file, in_bound_idxs=est_idxs, **config) est_labels = S.processFlat()[1] # Make sure the first and last boundaries are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, features["hpcp"].shape[0], frame_times, features["anal"]["dur"]) return est_times, est_labels
def run_algorithms(audio_file, boundaries_id, labels_id, config, annotator_id=0): """Runs the algorithms with the specified identifiers on the audio_file. Parameters ---------- audio_file: str Path to the audio file to segment. boundaries_id: str Identifier of the boundaries algorithm to use ("gt" for ground truth). labels_id: str Identifier of the labels algorithm to use (None for not labeling). config: dict Dictionary containing the custom parameters of the algorithms to use. annotator_id: int Annotator identificator in the ground truth. Returns ------- est_times: np.array or list List of estimated times for the segment boundaries. If `list`, it will be a list of np.arrays, sorted by segmentation layer. est_labels: np.array or list List of all the labels associated segments. If `list`, it will be a list of np.arrays, sorted by segmentation layer. """ # At this point, features should have already been computed hpcp, mfcc, tonnetz, cqt, gmt, beats, dur, anal = \ io.get_features(audio_file, config["annot_beats"], config["framesync"], pre_features=config["features"]) # Check that there are enough audio frames if hpcp.shape[0] <= msaf.minimum__frames: logging.warning("Audio file too short, or too many few beats " "estimated. Returning empty estimations.") return np.asarray([0, dur]), np.asarray([0], dtype=int) # Get the corresponding modules bounds_module = get_boundaries_module(boundaries_id) labels_module = get_labels_module(labels_id) # Get the correct frame times frame_times = beats if config["framesync"]: frame_times = utils.get_time_frames(dur, anal) # Segment audio based on type of segmentation if config["hier"]: # Hierarchical segmentation if bounds_module is None: raise RuntimeError("A boundary algorithm is needed when using " "hierarchical segmentation.") if labels_module is not None and \ bounds_module.__name__ != labels_module.__name__: raise RuntimeError("The same algorithm for boundaries and labels is " "needed when using hierarchical segmentation.") S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processHierarchical() # Make sure the first and last boundaries are included for each # level in the hierarchy est_times = [] cleaned_est_labels = [] for level in range(len(est_idxs)): est_level_times, est_level_labels = \ utils.process_segmentation_level(est_idxs[level], est_labels[level], hpcp.shape[0], frame_times, dur) est_times.append(est_level_times) cleaned_est_labels.append(est_level_labels) est_labels = cleaned_est_labels else: # Flat segmentation # Segment using the specified boundaries and labels # Case when boundaries and labels algorithms are the same if bounds_module is not None and labels_module is not None and \ bounds_module.__name__ == labels_module.__name__: S = bounds_module.Segmenter(audio_file, **config) est_idxs, est_labels = S.processFlat() # Different boundary and label algorithms else: # Identify segment boundaries if bounds_module is not None: S = bounds_module.Segmenter(audio_file, in_labels=[], **config) est_idxs, est_labels = S.processFlat() else: try: est_times, est_labels = io.read_references( audio_file, annotator_id=annotator_id) est_idxs = io.align_times(est_times, frame_times[:-1]) if est_idxs[0] != 0: est_idxs = np.concatenate(([0], est_idxs)) if est_idxs[-1] != hpcp.shape[0] - 1: est_idxs = np.concatenate((est_idxs, [hpcp.shape[0] - 1])) except: logging.warning("No references found for file: %s" % audio_file) return [], [] # Label segments if labels_module is not None: if len(est_idxs) == 2: est_labels = np.array([0]) else: S = labels_module.Segmenter(audio_file, in_bound_idxs=est_idxs, **config) est_labels = S.processFlat()[1] # Make sure the first and last boundaries are included est_times, est_labels = utils.process_segmentation_level( est_idxs, est_labels, hpcp.shape[0], frame_times, dur) return est_times, est_labels