示例#1
0
    def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc", "cqt", "tempogram"],
                    normalize=True):
        """This method obtains the actual features."""
        # Read features
        if self.features is None:
            self.features = io.get_features(self.audio_file,
                                            annot_beats=self.annot_beats,
                                            framesync=self.framesync)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = self.features[self.feature_str]
            except KeyError:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F
示例#2
0
    def _preprocess(self,
                    valid_features=["hpcp", "tonnetz", "mfcc", "cqt"],
                    normalize=True):
        """This method obtains the actual features."""
        # Read features
        if self.features is None:
            self.features = io.get_features(self.audio_file,
                                            annot_beats=self.annot_beats,
                                            framesync=self.framesync)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = self.features[self.feature_str]
            except KeyError:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F
示例#3
0
    def _preprocess(self,
                    valid_features=["hpcp", "tonnetz", "mfcc", "cqt", "gmt"],
                    normalize=True):
        """This method obtains the actual features."""
        # Read features
        self.hpcp, self.mfcc, self.tonnetz, self.cqt, self.gmt, beats, dur, self.anal = \
         io.get_features(self.audio_file, annot_beats=self.annot_beats,
             framesync=self.framesync,
             pre_features=self.features)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = eval("self." + self.feature_str)
            except:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F
示例#4
0
    def _preprocess(self, valid_features=["hpcp", "tonnetz", "mfcc", "cqt"],
                    normalize=True):
        """This method obtains the actual features."""
        # Read features
        self.hpcp, self.mfcc, self.tonnetz, self.cqt, beats, dur, self.anal = \
            io.get_features(self.audio_file, annot_beats=self.annot_beats,
                            framesync=self.framesync,
                            pre_features=self.features)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = eval("self." + self.feature_str)
            except:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F
示例#5
0
文件: run.py 项目: kacrouse/msaf
def run_algorithms(audio_file, boundaries_id, labels_id, config,
                   annotator_id=0):
    """Runs the algorithms with the specified identifiers on the audio_file.

    Parameters
    ----------
    audio_file: str
        Path to the audio file to segment.
    boundaries_id: str
        Identifier of the boundaries algorithm to use ("gt" for ground truth).
    labels_id: str
        Identifier of the labels algorithm to use (None for not labeling).
    config: dict
        Dictionary containing the custom parameters of the algorithms to use.
    annotator_id: int
        Annotator identificator in the ground truth.

    Returns
    -------
    est_times: np.array or list
        List of estimated times for the segment boundaries.
        If `list`, it will be a list of np.arrays, sorted by segmentation
        layer.
    est_labels: np.array or list
        List of all the labels associated segments.
        If `list`, it will be a list of np.arrays, sorted by segmentation
        layer.
    """
    # Features should have already been computed, let's read them
    features = io.get_features(audio_file, config["annot_beats"],
                               config["framesync"])
    config["features"] = features

    # Check that there are enough audio frames
    if features["hpcp"].shape[0] <= msaf.minimum__frames:
        logging.warning("Audio file too short, or too many few beats "
                        "estimated. Returning empty estimations.")
        return np.asarray([0, features["anal"]["dur"]]), \
            np.asarray([0], dtype=int)

    # Get the corresponding modules
    bounds_module = get_boundaries_module(boundaries_id)
    labels_module = get_labels_module(labels_id)

    # Get the correct frame times
    frame_times = features["beats"]
    if config["framesync"]:
        frame_times = utils.get_time_frames(features["anal"]["dur"],
                                            features["anal"])

    # Segment audio based on type of segmentation
    run_fun = run_hierarchical if config["hier"] else run_flat
    est_times, est_labels = run_fun(audio_file, bounds_module, labels_module,
                                    frame_times, config, annotator_id)

    return est_times, est_labels
示例#6
0
    def _preprocess(self,
                    valid_features=["hpcp", "tonnetz", "mfcc"],
                    normalize=True):
        """This method obtains the actual features, their frame times,
        and the boundary indeces in these features if needed."""
        # Read features
        if self.features is None:
            # Features stored in a json file
            self.hpcp, self.mfcc, self.tonnetz, beats, dur, anal = \
                io.get_features(self.audio_file, annot_beats=self.annot_beats,
                                framesync=self.framesync)
        else:
            # Features passed as parameters
            feat_prefix = ""
            if not self.framesync:
                feat_prefix = "bs_"
            self.hpcp = self.features["%shpcp" % feat_prefix]
            self.mfcc = self.features["%smfcc" % feat_prefix]
            self.tonnetz = self.features["%stonnetz" % feat_prefix]
            beats = self.features["beats"]
            dur = self.features["anal"]["dur"]
            anal = self.features["anal"]

        # Store analysis parameters
        self.anal = anal

        # Use correct frames to find times
        frame_times = beats
        if self.framesync:
            frame_times = U.get_time_frames(dur, anal)

        # Read input bounds if necessary
        bound_idxs = None
        if self.in_bound_times is not None:
            bound_idxs = io.align_times(self.in_bound_times, frame_times)
            bound_idxs = np.unique(bound_idxs)

        # Use specific feature
        if self.feature_str not in valid_features:
            raise RuntimeError("Feature %s in not valid for algorithm: %s "
                               "(valid features are %s)." %
                               (self.feature_str, __name__, valid_features))
        else:
            try:
                F = eval("self." + self.feature_str)
            except:
                raise RuntimeError("Feature %s in not supported by MSAF" %
                                   (self.feature_str))

        # Normalize if needed
        if normalize:
            F = U.lognormalize_chroma(F)

        return F, frame_times, dur, bound_idxs
示例#7
0
def run_algorithms(audio_file, boundaries_id, labels_id, config,
				   annotator_id=0):
	"""Runs the algorithms with the specified identifiers on the audio_file.

	Parameters
	----------
	audio_file: str
		Path to the audio file to segment.
	boundaries_id: str
		Identifier of the boundaries algorithm to use ("gt" for ground truth).
	labels_id: str
		Identifier of the labels algorithm to use (None for not labeling).
	config: dict
		Dictionary containing the custom parameters of the algorithms to use.
	annotator_id: int
		Annotator identificator in the ground truth.

	Returns
	-------
	est_times: np.array or list
		List of estimated times for the segment boundaries.
		If `list`, it will be a list of np.arrays, sorted by segmentation layer.
	est_labels: np.array or list
		List of all the labels associated segments.
		If `list`, it will be a list of np.arrays, sorted by segmentation layer.
	"""

	# At this point, features should have already been computed
	hpcp, mfcc, tonnetz, cqt, gmt, beats, dur, anal =  \
			io.get_features(audio_file, config["annot_beats"],
							config["framesync"],
							pre_features=config["features"])

	# Check that there are enough audio frames
	if hpcp.shape[0] <= msaf.minimum__frames:
		logging.warning("Audio file too short, or too many few beats "
						"estimated. Returning empty estimations.")
		return np.asarray([0, dur]), np.asarray([0], dtype=int)

	# Get the corresponding modules
	bounds_module = get_boundaries_module(boundaries_id)
	labels_module = get_labels_module(labels_id)

	# Get the correct frame times
	frame_times = beats
	if config["framesync"]:
		frame_times = utils.get_time_frames(dur, anal)

	# Segment audio based on type of segmentation
	if config["hier"]:
		# Hierarchical segmentation
		if bounds_module is None:
			raise RuntimeError("A boundary algorithm is needed when using "
							   "hierarchical segmentation.")
		if labels_module is not None and \
				bounds_module.__name__ != labels_module.__name__:
			raise RuntimeError("The same algorithm for boundaries and labels is "
							   "needed when using hierarchical segmentation.")
		S = bounds_module.Segmenter(audio_file, **config)
		est_idxs, est_labels = S.processHierarchical()

		# Make sure the first and last boundaries are included for each
		# level in the hierarchy
		est_times = []
		cleaned_est_labels = []
		for level in range(len(est_idxs)):
			est_level_times, est_level_labels = \
				utils.process_segmentation_level(est_idxs[level],
												 est_labels[level],
												 hpcp.shape[0],
												 frame_times,
												 dur)
			est_times.append(est_level_times)
			cleaned_est_labels.append(est_level_labels)
		est_labels = cleaned_est_labels
	else:
		# Flat segmentation
		# Segment using the specified boundaries and labels
		# Case when boundaries and labels algorithms are the same
		if bounds_module is not None and labels_module is not None and \
				bounds_module.__name__ == labels_module.__name__:
			S = bounds_module.Segmenter(audio_file, **config)
			est_idxs, est_labels = S.processFlat()
		# Different boundary and label algorithms
		else:
			# Identify segment boundaries
			if bounds_module is not None:
				S = bounds_module.Segmenter(audio_file, in_labels=[], **config)
				est_idxs, est_labels = S.processFlat()
			else:
				try:
					est_times, est_labels = io.read_references(
						audio_file, annotator_id=annotator_id)
					est_idxs = io.align_times(est_times, frame_times[:-1])
					if est_idxs[0] != 0:
						est_idxs = np.concatenate(([0], est_idxs))
					if est_idxs[-1] != hpcp.shape[0] - 1:
						est_idxs = np.concatenate((est_idxs, [hpcp.shape[0] - 1]))
				except:
					logging.warning("No references found for file: %s" %
									audio_file)
					return [], []

			# Label segments
			if labels_module is not None:
				if len(est_idxs) == 2:
					est_labels = np.array([0])
				else:
					S = labels_module.Segmenter(audio_file,
												in_bound_idxs=est_idxs,
												**config)
					est_labels = S.processFlat()[1]

		# Make sure the first and last boundaries are included
		est_times, est_labels = utils.process_segmentation_level(
			est_idxs, est_labels, hpcp.shape[0], frame_times, dur)

	return est_times, est_labels