Пример #1
0
def process(
    in_path,
    annot_beats=False,
    feature="hpcp",
    ds_name="*",
    framesync=False,
    boundaries_id=msaf.DEFAULT_BOUND_ID,
    labels_id=msaf.DEFAULT_LABEL_ID,
    hier=False,
    sonify_bounds=False,
    plot=False,
    n_jobs=4,
    annotator_id=0,
    config=None,
    out_bounds="out_bounds.wav",
    out_sr=22050,
):
    """Main process to segment a file or a collection of files.

    Parameters
    ----------
    in_path: str
        Input path. If a directory, MSAF will function in collection mode.
        If audio file, MSAF will be in single file mode.
    annot_beats: bool
        Whether to use annotated beats or not. Only available in collection
        mode.
    feature: str
        String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
    ds_name: str
        Prefix of the dataset to be used (e.g. SALAMI, Isophonics)
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    boundaries_id: str
        Identifier of the boundaries algorithm (use "gt" for groundtruth)
    labels_id: str
        Identifier of the labels algorithm (use None to not compute labels)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    sonify_bounds: bool
        Whether to write an output audio file with the annotated boundaries
        or not (only available in Single File Mode).
    plot: bool
        Whether to plot the boundaries and labels against the ground truth.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id: int
        Annotator identificator in the ground truth.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.
    out_bounds: str
        Path to the output for the sonified boundaries (only in single file
        mode, when sonify_bounds is True.
    out_sr : int
        Sampling rate for the sonified bounds.

    Returns
    -------
    results : list
        List containing tuples of (est_times, est_labels) of estimated
        boundary times and estimated labels.
        If labels_id is None, est_labels will be a list of -1.
    """
    # Seed random to reproduce results
    np.random.seed(123)

    # Make sure that the features used are correct
    assert feature in msaf.AVAILABLE_FEATS

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync, boundaries_id, labels_id)
        config["features"] = None

    # Save multi-segment (hierarchical) configuration
    config["hier"] = hier

    if os.path.isfile(in_path):
        # Single file mode
        # Get (if they exitst) or compute features
        file_struct = msaf.io.FileStruct(in_path)
        if not os.path.exists(file_struct.features_file):
            # Compute and save features
            all_features = featextract.compute_features_for_audio_file(in_path)
            msaf.utils.ensure_dir(os.path.dirname(file_struct.features_file))
            msaf.featextract.save_features(file_struct.features_file, all_features)
        # Get correct features
        config["features"] = msaf.io.get_features(in_path, annot_beats=annot_beats, framesync=framesync)

        # And run the algorithms
        est_times, est_labels = run_algorithms(in_path, boundaries_id, labels_id, config, annotator_id=annotator_id)

        if sonify_bounds:
            logging.info("Sonifying boundaries in %s..." % out_bounds)
            audio_hq, sr = librosa.load(in_path, sr=out_sr)
            utils.sonify_clicks(audio_hq, est_times, out_bounds, out_sr)

        if plot:
            plotting.plot_one_track(file_struct, est_times, est_labels, boundaries_id, labels_id, ds_name)

        # Save estimations
        msaf.utils.ensure_dir(os.path.dirname(file_struct.est_file))
        io.save_estimations(file_struct, est_times, est_labels, boundaries_id, labels_id, **config)

        return est_times, est_labels
    else:
        # Collection mode
        file_structs = io.get_dataset_files(in_path, ds_name)

        # Call in parallel
        return Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct, boundaries_id, labels_id, config, annotator_id=annotator_id)
            for file_struct in file_structs[:]
        )
Пример #2
0
def process(
    in_path,
    boundaries_id=msaf.DEFAULT_BOUND_ID,
    labels_id=msaf.DEFAULT_LABEL_ID,
    ds_name="*",
    annot_beats=False,
    framesync=False,
    feature="hpcp",
    hier=False,
    save=False,
    n_jobs=4,
    annotator_id=0,
    config=None,
):
    """Main process.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    feature: str
        String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    save: boolean
        Whether to save the results into the SQLite database.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id : int
        Number identifiying the annotator.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync, boundaries_id, labels_id)

    # Hierarchical segmentation
    config["hier"] = hier

    # Remove actual features
    config.pop("features", None)

    # Sanity check for hierarchical evaluation
    if hier:
        try:
            from mir_eval import hierarchy
        except ImportError:
            logging.error(
                "An experimental mir_eval version is needed to "
                "evaluate hierarchical segments. Please, download it"
                " from: https://github.com/bmcfee/mir_eval"
                " and checkout the tmeasures branch."
            )
            return []

    # Get out file in case we want to save results
    out_file = get_results_file_name(boundaries_id, labels_id, config, ds_name, annotator_id)

    # All evaluations
    results = pd.DataFrame()

    if os.path.isfile(in_path):
        # Single File mode
        evals = [process_track(in_path, boundaries_id, labels_id, config, annotator_id=annotator_id)]
    else:
        # Collection mode
        # If out_file already exists, do not compute new results
        if os.path.exists(out_file):
            logging.info("Results already exists, reading from file %s" % out_file)
            results = pd.read_csv(out_file)
            print_results(results)
            return results

        # Get files
        file_structs = io.get_dataset_files(in_path, ds_name)

        logging.info("Evaluating %d tracks..." % len(file_structs))

        # Evaluate in parallel
        evals = Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct, boundaries_id, labels_id, config, annotator_id=annotator_id)
            for file_struct in file_structs[:]
        )

    # Aggregate evaluations in pandas format
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing results in %s" % out_file)
        results.to_csv(out_file)

    return results
Пример #3
0
def process(in_path,
            boundaries_id=msaf.config.default_bound_id,
            labels_id=msaf.config.default_label_id,
            annot_beats=False,
            framesync=False,
            feature="pcp",
            hier=False,
            save=False,
            n_jobs=4,
            annotator_id=0,
            config=None):
    """Main process to evaluate algorithms' results.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    feature: str
        String representing the feature to be used (e.g. pcp, mfcc, tonnetz)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    save: boolean
        Whether to save the results into the SQLite database.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id : int
        Number identifiying the annotator.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

    # Hierarchical segmentation
    config["hier"] = hier

    # Remove actual features
    config.pop("features", None)

    # Get out file in case we want to save results
    out_file = get_results_file_name(boundaries_id, labels_id, config,
                                     annotator_id)

    # All evaluations
    results = pd.DataFrame()

    if os.path.isfile(in_path):
        # Single File mode
        evals = [
            process_track(in_path,
                          boundaries_id,
                          labels_id,
                          config,
                          annotator_id=annotator_id)
        ]
    else:
        # Collection mode
        # If out_file already exists, do not compute new results
        if os.path.exists(out_file):
            logging.info("Results already exists, reading from file %s" %
                         out_file)
            results = pd.read_csv(out_file)
            print_results(results)
            return results

        # Get files
        file_structs = io.get_dataset_files(in_path)

        logging.info("Evaluating %d tracks..." % len(file_structs))

        # Evaluate in parallel
        evals = Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct,
                                   boundaries_id,
                                   labels_id,
                                   config,
                                   annotator_id=annotator_id)
            for file_struct in file_structs[:])

    # Aggregate evaluations in pandas format
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing results in %s" % out_file)
        results.to_csv(out_file)

    return results
Пример #4
0
def process(in_path, annot_beats=False, feature="mfcc", ds_name="*",
			framesync=False, boundaries_id="gt", labels_id=None, hier=False,
			sonify_bounds=False, plot=False, n_jobs=4, annotator_id=0,
			config=None, out_bounds="out_bounds.wav"):
	"""Main process to segment a file or a collection of files.

	Parameters
	----------
	in_path: str
		Input path. If a directory, MSAF will function in collection mode.
		If audio file, MSAF will be in single file mode.
	annot_beats: bool
		Whether to use annotated beats or not. Only available in collection
		mode.
	feature: str
		String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
	ds_name: str
		Prefix of the dataset to be used (e.g. SALAMI, Isophonics)
	framesync: str
		Whether to use framesync features or not (default: False -> beatsync)
	boundaries_id: str
		Identifier of the boundaries algorithm (use "gt" for groundtruth)
	labels_id: str
		Identifier of the labels algorithm (use None to not compute labels)
	hier : bool
		Whether to compute a hierarchical or flat segmentation.
	sonify_bounds: bool
		Whether to write an output audio file with the annotated boundaries
		or not (only available in Single File Mode).
	plot: bool
		Whether to plot the boundaries and labels against the ground truth.
	n_jobs: int
		Number of processes to run in parallel. Only available in collection
		mode.
	annotator_id: int
		Annotator identificator in the ground truth.
	config: dict
		Dictionary containing custom configuration parameters for the
		algorithms.	 If None, the default parameters are used.
	out_bounds: str
		Path to the output for the sonified boundaries (only in single file
		mode, when sonify_bounds is True.

	Returns
	-------
	results : list
		List containing tuples of (est_times, est_labels) of estimated
		boundary times and estimated labels.
		If labels_id is None, est_labels will be a list of -1.
	"""
	# Seed random to reproduce results
	np.random.seed(123)

	# Set up configuration based on algorithms parameters
	if config is None:
		config = io.get_configuration(feature, annot_beats, framesync,
									  boundaries_id, labels_id)
		config["features"] = None
		config["hier"] = hier

	if os.path.isfile(in_path):
		# Single file mode
		# Get (if they exitst) or compute features
		# TODO:Modularize!
		file_struct = msaf.io.FileStruct(in_path)
		if os.path.exists(file_struct.features_file):
			feat_prefix = ""
			if not framesync:
				feat_prefix = "bs_"
			features = {}
		
			'''Mi: added the Gammatone featureset'''
		
			features["%shpcp" % feat_prefix], features["%smfcc" % feat_prefix], \
				features["%stonnetz" % feat_prefix], features["%scqt" % feat_prefix], \
				features["%sgmt" % feat_prefix], features["beats"], dur, \
				features["anal"] = msaf.io.get_features(in_path,
														annot_beats=annot_beats,
														framesync=framesync,
														pre_features=None)
		else:
			# Compute and save features
			features = featextract.compute_features_for_audio_file(in_path)
			msaf.utils.ensure_dir(os.path.dirname(file_struct.features_file))
			msaf.featextract.save_features(file_struct.features_file, features)

		config["features"] = features
		config["hier"] = hier

		# And run the algorithms
		est_times, est_labels = run_algorithms(in_path, boundaries_id,
											   labels_id, config,
											   annotator_id=annotator_id)

		if sonify_bounds:
			logging.info("Sonifying boundaries in %s..." % out_bounds)
			fs = 44100
			audio_hq, sr = librosa.load(in_path, sr=fs)
			utils.sonify_clicks(audio_hq, est_times, out_bounds, fs)

		if plot:
			plotting.plot_one_track(save_plot_path, file_struct, est_times, est_labels,
									boundaries_id, labels_id, ds_name)

		# Save estimations
		msaf.utils.ensure_dir(os.path.dirname(file_struct.est_file))
		config["features"] = None
		io.save_estimations(file_struct.est_file, est_times, est_labels,
							boundaries_id, labels_id, **config)

		return est_times, est_labels
	else:
		# Collection mode
		file_structs = io.get_dataset_files(in_path, ds_name)

		# Call in parallel
		return Parallel(n_jobs=n_jobs)(delayed(process_track)(
			file_struct, boundaries_id, labels_id, config,
			annotator_id=annotator_id, plot=plot) for file_struct in file_structs[:])
Пример #5
0
def process(in_path,
            annot_beats=False,
            feature="mfcc",
            ds_name="*",
            framesync=False,
            boundaries_id="gt",
            labels_id=None,
            n_jobs=4,
            config=None):
    """Sweeps parameters across the specified algorithm."""

    results_file = "results_sweep_boundsE%s_labelsE%s.csv" % (boundaries_id,
                                                              labels_id)

    if labels_id == "cnmf3" or boundaries_id == "cnmf3":
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

        hh = range(15, 33)
        RR = range(15, 40)
        ranks = range(3, 6)
        RR_labels = range(11, 12)
        ranks_labels = range(6, 7)
        all_results = pd.DataFrame()
        for rank in ranks:
            for h in hh:
                for R in RR:
                    for rank_labels in ranks_labels:
                        for R_labels in RR_labels:
                            config["h"] = h
                            config["R"] = R
                            config["rank"] = rank
                            config["rank_labels"] = rank_labels
                            config["R_labels"] = R_labels
                            config["features"] = None

                            # Run process
                            msaf.run.process(in_path,
                                             ds_name=ds_name,
                                             n_jobs=n_jobs,
                                             boundaries_id=boundaries_id,
                                             labels_id=labels_id,
                                             config=config)

                            # Compute evaluations
                            results = msaf.eval.process(in_path,
                                                        boundaries_id,
                                                        labels_id,
                                                        ds_name,
                                                        save=True,
                                                        n_jobs=n_jobs,
                                                        config=config)

                            # Save avg results
                            new_columns = {
                                "config_h": h,
                                "config_R": R,
                                "config_rank": rank,
                                "config_R_labels": R_labels,
                                "config_rank_labels": rank_labels
                            }
                            results = results.append([new_columns],
                                                     ignore_index=True)
                            all_results = all_results.append(results.mean(),
                                                             ignore_index=True)
                            all_results.to_csv(results_file)

    elif labels_id is None and boundaries_id == "sf":
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

        MM = range(20, 32)
        mm = range(3, 4)
        kk = np.arange(0.03, 0.1, 0.01)
        Mpp = range(16, 32)
        ott = np.arange(0.02, 0.1, 0.01)
        all_results = pd.DataFrame()
        for k in kk:
            for ot in ott:
                for m in mm:
                    for M in MM:
                        for Mp in Mpp:
                            config["M_gaussian"] = M
                            config["m_embedded"] = m
                            config["k_nearest"] = k
                            config["Mp_adaptive"] = Mp
                            config["offset_thres"] = ot
                            config["features"] = None

                            # Run process
                            msaf.run.process(in_path,
                                             ds_name=ds_name,
                                             n_jobs=n_jobs,
                                             boundaries_id=boundaries_id,
                                             labels_id=labels_id,
                                             config=config)

                            # Compute evaluations
                            results = msaf.eval.process(in_path,
                                                        boundaries_id,
                                                        labels_id,
                                                        ds_name,
                                                        save=True,
                                                        n_jobs=n_jobs,
                                                        config=config)

                            # Save avg results
                            new_columns = {
                                "config_M": M,
                                "config_m": m,
                                "config_k": k,
                                "config_Mp": Mp,
                                "config_ot": ot
                            }
                            results = results.append([new_columns],
                                                     ignore_index=True)
                            all_results = all_results.append(results.mean(),
                                                             ignore_index=True)
                            all_results.to_csv(results_file)

    else:
        logging.error("Can't sweep parameters for %s algorithm. "
                      "Implement me! :D")
def process(in_path,
            annot_beats=False,
            feature="pcp",
            framesync=False,
            boundaries_id=msaf.config.default_bound_id,
            labels_id=msaf.config.default_label_id,
            hier=False,
            sonify_bounds=False,
            plot=False,
            n_jobs=4,
            annotator_id=0,
            config=None,
            out_bounds="out_bounds.wav",
            out_sr=22050,
            output_file=None):
    """Main process to segment a file or a collection of files.

    Parameters
    ----------
    in_path: str
        Input path. If a directory, MSAF will function in collection mode.
        If audio file, MSAF will be in single file mode.
    annot_beats: bool
        Whether to use annotated beats or not.
    feature: str
        String representing the feature to be used (e.g. pcp, mfcc, tonnetz)
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    boundaries_id: str
        Identifier of the boundaries algorithm (use "gt" for groundtruth)
    labels_id: str
        Identifier of the labels algorithm (use None to not compute labels)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    sonify_bounds: bool
        Whether to write an output audio file with the annotated boundaries
        or not (only available in Single File Mode).
    plot: bool
        Whether to plot the boundaries and labels against the ground truth.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id: int
        Annotator identificator in the ground truth.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.
    out_bounds: str
        Path to the output for the sonified boundaries (only in single file
        mode, when sonify_bounds is True.
    out_sr : int
        Sampling rate for the sonified bounds.

    Returns
    -------
    results : list
        List containing tuples of (est_times, est_labels) of estimated
        boundary times and estimated labels.
        If labels_id is None, est_labels will be a list of -1.
    """
    # Seed random to reproduce results
    np.random.seed(123)

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)
        config["features"] = None

    # Save multi-segment (hierarchical) configuration
    config["hier"] = hier
    if not os.path.exists(in_path):
        raise NoAudioFileError("File or directory does not exists, %s" %
                               in_path)
    if os.path.isfile(in_path):
        # Single file mode
        # Get (if they exitst) or compute features
        file_struct = msaf.io.FileStruct(in_path)

        # Use temporary file in single mode
        file_struct.features_file = msaf.config.features_tmp_file

        # Get features
        config["features"] = Features.select_features(feature, file_struct,
                                                      annot_beats, framesync)

        # And run the algorithms
        est_times, est_labels = run_algorithms(file_struct,
                                               boundaries_id,
                                               labels_id,
                                               config,
                                               annotator_id=annotator_id)

        if sonify_bounds:
            logging.info("Sonifying boundaries in %s..." % out_bounds)
            audio_hq, sr = librosa.load(in_path, sr=out_sr)
            utils.sonify_clicks(audio_hq, est_times, out_bounds, out_sr)

        if plot:
            custom_plotting.plot_one_track(file_struct,
                                           est_times,
                                           est_labels,
                                           boundaries_id,
                                           labels_id,
                                           output_file=output_file)

        # TODO: Only save if needed
        # Save estimations
        msaf.utils.ensure_dir(os.path.dirname(file_struct.est_file))
        io.save_estimations(file_struct, est_times, est_labels, boundaries_id,
                            labels_id, **config)

        return est_times, est_labels
    else:
        # Collection mode
        file_structs = io.get_dataset_files(in_path)

        return Parallel(n_jobs=n_jobs)(
            delayed(process_track)(file_struct,
                                   boundaries_id,
                                   labels_id,
                                   config,
                                   annotator_id=annotator_id)
            for file_struct in file_structs[:])
Пример #7
0
def process(in_path,
            annot_beats=False,
            feature="mfcc",
            ds_name="*",
            framesync=False,
            boundaries_id="gt",
            labels_id=None,
            out_audio=False,
            plot=False,
            n_jobs=4,
            config=None):
    """Main process to segment a file or a collection of files.

    Parameters
    ----------
    in_path: str
        Input path. If a directory, MSAF will function in collection mode.
        If audio file, MSAF will be in single file mode.
    annot_beats: bool
        Whether to use annotated beats or not. Only available in collection
        mode.
    feature: str
        String representing the feature to be used (e.g. hpcp, mfcc, tonnetz)
    ds_name: str
        Prefix of the dataset to be used (e.g. SALAMI, Isophonics)
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    boundaries_id: str
        Identifier of the boundaries algorithm (use "gt" for groundtruth)
    labels_id: str
        Identifier of the labels algorithm (use None to not compute labels)
    out_audio: bool
        Whether to write an output audio file with the annotated boundaries
        or not (only available in Single File Mode).
    plot: bool
        Whether to plot the boundaries and labels against the ground truth.
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Returns
    -------
    results : list
        List containing tuples of (est_times, est_labels) of estimated
        boundary times and estimated labels.
        If labels_id is None, est_labels will be a list of -1.
    """

    # Seed random to reproduce results
    np.random.seed(123)

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id, algorithms)

    if os.path.isfile(in_path):
        # Single file mode
        audio, features = featextract.compute_features_for_audio_file(in_path)
        config["features"] = features

        # And run the algorithms
        est_times, est_labels = run_algorithms(in_path, boundaries_id,
                                               labels_id, config)

        if out_audio:
            # TODO: Set a nicer output file name?
            #out_file = in_path[:-4] + msaf.out_boundaries_ext
            out_file = "out_boundaries.wav"
            logging.info("Sonifying boundaries in %s..." % out_file)
            fs = 44100
            audio_hq = featextract.read_audio(in_path, fs)
            utils.write_audio_boundaries(
                audio_hq, np.delete(est_times, [1, len(est_times) - 2]),
                out_file, fs)

        if plot:
            plotting.plot_one_track(in_path, est_times, est_labels,
                                    boundaries_id, labels_id)

        return est_times, est_labels
    else:
        # Collection mode
        file_structs = io.get_dataset_files(in_path, ds_name)

        # Call in parallel
        return Parallel(n_jobs=n_jobs)(delayed(process_track)(
            file_struct, boundaries_id, labels_id, config)
                                       for file_struct in file_structs[:])
Пример #8
0
def process(in_path, annot_beats=False, feature="mfcc", ds_name="*",
            framesync=False, boundaries_id="gt", labels_id=None, n_jobs=4,
            config=None):
    """Sweeps parameters across the specified algorithm."""

    results_file = "results_sweep_boundsE%s_labelsE%s.csv" % (boundaries_id,
                                                              labels_id)

    if labels_id == "cnmf3" or boundaries_id == "cnmf3":
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

        hh = range(15, 33)
        RR = range(15, 40)
        ranks = range(3, 6)
        RR_labels = range(11, 12)
        ranks_labels = range(6, 7)
        all_results = pd.DataFrame()
        for rank in ranks:
            for h in hh:
                for R in RR:
                    for rank_labels in ranks_labels:
                        for R_labels in RR_labels:
                            config["h"] = h
                            config["R"] = R
                            config["rank"] = rank
                            config["rank_labels"] = rank_labels
                            config["R_labels"] = R_labels
                            config["features"] = None

                            # Run process
                            msaf.run.process(in_path, ds_name=ds_name, n_jobs=n_jobs,
                                        boundaries_id=boundaries_id,
                                        labels_id=labels_id, config=config)

                            # Compute evaluations
                            results = msaf.eval.process(in_path, boundaries_id, labels_id,
                                                ds_name, save=True, n_jobs=n_jobs,
                                                config=config)

                            # Save avg results
                            new_columns = {"config_h": h, "config_R": R,
                                           "config_rank": rank,
                                           "config_R_labels": R_labels,
                                           "config_rank_labels": rank_labels}
                            results = results.append([new_columns],
                                                    ignore_index=True)
                            all_results = all_results.append(results.mean(),
                                                            ignore_index=True)
                            all_results.to_csv(results_file)

    elif labels_id is None and boundaries_id == "sf":
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

        MM = range(14, 32)
        mm = range(3, 4)
        kk = np.arange(0.03, 0.1, 0.01)
        Mpp = range(16, 32)
        ott = np.arange(0.02, 0.1, 0.01)
        all_results = pd.DataFrame()
        for k in kk:
            for ot in ott:
                for m in mm:
                    for M in MM:
                        for Mp in Mpp:
                            config["M_gaussian"] = M
                            config["m_embedded"] = m
                            config["k_nearest"] = k
                            config["Mp_adaptive"] = Mp
                            config["offset_thres"] = ot
                            config["features"] = None

                            # Run process
                            msaf.run.process(in_path, ds_name=ds_name, n_jobs=n_jobs,
                                        boundaries_id=boundaries_id,
                                        labels_id=labels_id, config=config)

                            # Compute evaluations
                            results = msaf.eval.process(in_path, boundaries_id, labels_id,
                                                ds_name, save=True, n_jobs=n_jobs,
                                                config=config)

                            # Save avg results
                            new_columns = {"config_M": M, "config_m": m,
                                           "config_k": k, "config_Mp": Mp,
                                           "config_ot": ot}
                            results = results.append([new_columns],
                                                    ignore_index=True)
                            all_results = all_results.append(results.mean(),
                                                            ignore_index=True)
                            all_results.to_csv(results_file)

    else:
        logging.error("Can't sweep parameters for %s algorithm. "
                      "Implement me! :D")
Пример #9
0
def process(in_path,
            boundaries_id,
            labels_id=None,
            ds_name="*",
            annot_beats=False,
            framesync=False,
            feature="hpcp",
            save=False,
            n_jobs=4,
            config=None):
    """Main process.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    annot_bounds : boolean
        Whether to use the annotated bounds or not.
    save: boolean
        Whether to save the results into the SQLite database.
    params : dict
        Additional parameters (e.g. features)

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id, algorithms)

    # Get out file in case we want to save results
    out_file = get_results_file_name(boundaries_id, labels_id, config, ds_name)

    # If out_file already exists, do not compute new results
    if os.path.exists(out_file):
        logging.info("Results already exists, reading from file %s" % out_file)
        results = pd.read_csv(out_file)
        print_results(results)
        return results

    # Get files
    file_structs = io.get_dataset_files(in_path, ds_name)

    logging.info("Evaluating %d tracks..." % len(file_structs))

    # All evaluations
    results = pd.DataFrame()

    # Evaluate in parallel
    evals = Parallel(n_jobs=n_jobs)(
        delayed(process_track)(file_struct, boundaries_id, labels_id, config)
        for file_struct in file_structs[:])

    # Aggregat evaluations in pandas format
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing average results in %s" % out_file)
        results.to_csv(out_file)

    return results
Пример #10
0
def process(in_path, boundaries_id=msaf.config.default_bound_id,
            labels_id=msaf.config.default_label_id, annot_beats=False,
            framesync=False, feature="pcp", hier=False, save=False,
            out_file=None, n_jobs=4, annotator_id=0, config=None):
    """Main process to evaluate algorithms' results.

    Parameters
    ----------
    in_path : str
        Path to the dataset root folder.
    boundaries_id : str
        Boundaries algorithm identifier (e.g. siplca, cnmf)
    labels_id : str
        Labels algorithm identifier (e.g. siplca, cnmf)
    ds_name : str
        Name of the dataset to be evaluated (e.g. SALAMI). * stands for all.
    annot_beats : boolean
        Whether to use the annotated beats or not.
    framesync: str
        Whether to use framesync features or not (default: False -> beatsync)
    feature: str
        String representing the feature to be used (e.g. pcp, mfcc, tonnetz)
    hier : bool
        Whether to compute a hierarchical or flat segmentation.
    save: boolean
        Whether to save the results into the `out_file` csv file.
    out_file: str
        Path to the csv file to save the results (if `None` and `save = True`
        it will save the results in the default file name obtained by
        calling `get_results_file_name`).
    n_jobs: int
        Number of processes to run in parallel. Only available in collection
        mode.
    annotator_id : int
        Number identifiying the annotator.
    config: dict
        Dictionary containing custom configuration parameters for the
        algorithms.  If None, the default parameters are used.

    Return
    ------
    results : pd.DataFrame
        DataFrame containing the evaluations for each file.
    """

    # Set up configuration based on algorithms parameters
    if config is None:
        config = io.get_configuration(feature, annot_beats, framesync,
                                      boundaries_id, labels_id)

    # Hierarchical segmentation
    config["hier"] = hier

    # Remove actual features
    config.pop("features", None)

    # Get out file in case we want to save results
    if out_file is None:
        out_file = get_results_file_name(boundaries_id, labels_id, config,
                                         annotator_id)

    # If out_file already exists, read and return them
    if os.path.exists(out_file):
        logging.warning("Results already exists, reading from file %s" %
                        out_file)
        results = pd.read_csv(out_file)
        print_results(results)
        return results

    # Perform actual evaluations
    if os.path.isfile(in_path):
        # Single File mode
        evals = [process_track(in_path, boundaries_id, labels_id, config,
                               annotator_id=annotator_id)]
    else:
        # Collection mode
        # Get files
        file_structs = io.get_dataset_files(in_path)

        # Evaluate in parallel
        logging.info("Evaluating %d tracks..." % len(file_structs))
        evals = Parallel(n_jobs=n_jobs)(delayed(process_track)(
            file_struct, boundaries_id, labels_id, config,
            annotator_id=annotator_id) for file_struct in file_structs[:])

    # Aggregate evaluations in pandas format
    results = pd.DataFrame()
    for e in evals:
        if e != []:
            results = results.append(e, ignore_index=True)
    logging.info("%d tracks analyzed" % len(results))

    # Print results
    print_results(results)

    # Save all results
    if save:
        logging.info("Writing results in %s" % out_file)
        results.to_csv(out_file)

    return results