def process_track(file_struct, boundaries_id, labels_id, config): # Only analize files with annotated beats if config["annot_beats"]: jam = jams2.load(file_struct.ref_file) if len(jam.beats) > 0 and len(jam.beats[0].data) > 0: pass else: logging.warning("No beat information in file %s" % file_struct.ref_file) return logging.info("Segmenting %s" % file_struct.audio_file) # Compute features if needed if not os.path.isfile(file_struct.features_file): featextract.compute_all_features(file_struct) # Get estimations est_times, est_labels = run_algorithms(file_struct.audio_file, boundaries_id, labels_id, config) # Save logging.info("Writing results in: %s" % file_struct.est_file) est_inters = utils.times_to_intervals(est_times) io.save_estimations(file_struct.est_file, est_inters, est_labels, boundaries_id, labels_id, **config) return est_times, est_labels
def compute_all_features(file_struct, audio_beats=False, overwrite=False): """Computes all the features for a specific audio file and its respective human annotations. It creates an audio file with the estimated beats if needed.""" # Output file out_file = file_struct.features_file if os.path.isfile(out_file) and not overwrite: return # Do nothing, file already exist and we are not overwriting it # Compute the features for the given audio file audio, features = compute_features_for_audio_file(file_struct.audio_file) # Save output as audio file if audio_beats: logging.info("Saving Beats as an audio file") marker = ES.AudioOnsetsMarker(onsets=features["beats"], type='beep', sampleRate=msaf.Anal.sample_rate) marked_audio = marker(audio) ES.MonoWriter(filename='beats.wav', sampleRate=msaf.Anal.sample_rate)(marked_audio) # Read annotations if they exist in path/references_dir/file.jams if os.path.isfile(file_struct.ref_file): jam = jams2.load(file_struct.ref_file) # If beat annotations exist, compute also annotated beatsyn features if jam.beats != []: logging.info("Reading beat annotations from JAMS") annot = jam.beats[0] annot_beats = [] for data in annot.data: annot_beats.append(data.time.value) annot_beats = essentia.array(np.unique(annot_beats).tolist()) annot_mfcc, annot_hpcp, annot_tonnetz = compute_features( audio, annot_beats) # Save output as json file logging.info("Saving the JSON file in %s" % out_file) yaml = YamlOutput(filename=out_file, format='json') pool = essentia.Pool() pool.add("beats.times", features["beats"]) pool.add("beats.confidence", features["beats_conf"]) pool.set("analysis.sample_rate", msaf.Anal.sample_rate) pool.set("analysis.frame_rate", msaf.Anal.frame_size) pool.set("analysis.hop_size", msaf.Anal.hop_size) pool.set("analysis.window_type", msaf.Anal.window_type) pool.set("analysis.mfcc_coeff", msaf.Anal.mfcc_coeff) pool.set("timestamp", datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S")) save_features("framesync", pool, features["mfcc"], features["hpcp"], features["tonnetz"]) save_features("est_beatsync", pool, features["bs_mfcc"], features["bs_hpcp"], features["bs_tonnetz"]) if os.path.isfile(file_struct.ref_file) and jam.beats != []: save_features("ann_beatsync", pool, annot_mfcc, annot_hpcp, annot_tonnetz) yaml(pool)
def filter_by_artist(file_structs, artist_name="The Beatles"): """Filters data set files by artist name.""" new_file_structs = [] for file_struct in file_structs: jam = jams2.load(file_struct.ref_file) if jam.metadata.artist == artist_name: new_file_structs.append(file_struct) return new_file_structs
def process_track(file_struct, boundaries_id, labels_id, config, annotator_id=0, plot=False): """Prepares the parameters, runs the algorithms, and saves results. Parameters ---------- file_struct: Object FileStruct containing the paths of the input files (audio file, features file, reference file, output estimation file). boundaries_id: str Identifier of the boundaries algorithm to use ("gt" for ground truth). labels_id: str Identifier of the labels algorithm to use (None for not labeling). config: dict Dictionary containing the custom parameters of the algorithms to use. annotator_id: int Annotator identificator in the ground truth. Returns ------- est_times: np.array List of estimated times for the segment boundaries. est_labels: np.array List of all the labels associated segments. """ # Only analize files with annotated beats if config["annot_beats"]: jam = jams2.load(file_struct.ref_file) if len(jam.beats) > 0 and len(jam.beats[0].data) > 0: pass else: logging.warning("No beat information in file %s" % file_struct.ref_file) return logging.info("Segmenting %s" % file_struct.audio_file) # Compute features if needed if not os.path.isfile(file_struct.features_file): featextract.compute_all_features(file_struct) # Get estimations est_times, est_labels = run_algorithms(file_struct.audio_file, boundaries_id, labels_id, config, annotator_id=annotator_id) # Save logging.info("Writing results in: %s" % file_struct.est_file) io.save_estimations(file_struct.est_file, est_times, est_labels, boundaries_id, labels_id, **config) if plot: audio_name = splitext(basename(file_struct.audio_file))[0] plot_name = join(dirname(dirname(file_struct.audio_file)), 'plots', audio_name+'_'+'mfcc'+'_'+boundaries_id+'.pdf') dataset_name = basename(dirname(dirname(file_struct.audio_file))) plotting.plot_one_track(plot_name, file_struct, est_times, est_labels, boundaries_id, labels_id, dataset_name) return est_times, est_labels
def compute_all_features(file_struct, sonify_beats=False, overwrite=False, out_beats="out_beats.wav"): """Computes all the features for a specific audio file and its respective human annotations. It creates an audio file with the sonified estimated beats if needed. Parameters ---------- file_struct: FileStruct Object containing all the set of file paths of the input file. sonify_beats: bool Whether to sonify the beats. overwrite: bool Whether to overwrite previous features JSON file. out_beats: str Path to the new file containing the sonified beats. """ # Output file out_file = file_struct.features_file if os.path.isfile(out_file) and not overwrite: return # Do nothing, file already exist and we are not overwriting it # Compute the features for the given audio file features = compute_features_for_audio_file(file_struct.audio_file) # Save output as audio file if sonify_beats: logging.info("Sonifying beats...") fs = 44100 audio, sr = librosa.load(file_struct.audio_file, sr=fs) msaf.utils.sonify_clicks(audio, features["beats"], out_beats, fs, offset=0.0) # Read annotations if they exist in path/references_dir/file.jams if os.path.isfile(file_struct.ref_file): jam = jams2.load(file_struct.ref_file) # If beat annotations exist, compute also annotated beatsync features if jam.beats != []: logging.info("Reading beat annotations from JAMS") annot = jam.beats[0] annot_beats = [] for data in annot.data: annot_beats.append(data.time.value) annot_beats = np.unique(annot_beats) annot_beats_idx = librosa.time_to_frames( annot_beats, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size) features["ann_mfcc"], features["ann_hpcp"], \ features["ann_tonnetz"], features["ann_cqt"],\ features["ann_gmt"] = \ compute_beat_sync_features(features, annot_beats_idx) # Save output as json file save_features(out_file, features)
def read_estimations(est_file, boundaries_id, labels_id=None, **params): """Reads the estimations (boundaries and/or labels) from a jams file containing the estimations of an algorithm. Parameters ---------- est_file : str Path to the estimated file (JAMS file). boundaries_id : str Identifier of the algorithm used to compute the boundaries. labels_id : str Identifier of the algorithm used to compute the labels. params : dict Additional search parameters. E.g. {"feature" : "hpcp"}. Returns ------- boundaries : np.array((N,2)) Array containing the estimated boundaries in intervals. labels : np.array(N) Array containing the estimated labels. Empty array if labels_id is None. """ # Open file and read jams try: jam = jams2.load(est_file) except: logging.error("Could not open JAMS file %s" % est_file) return np.array([]), np.array([]) # Get all the estimations for the sections all_estimations = jam.sections # Find correct estimation correct_est, i = find_estimation(all_estimations, boundaries_id, labels_id, params, est_file) if correct_est is None: logging.error("Could not find estimation in %s" % est_file) return np.array([]), np.array([]) # Retrieve data boundaries = [] labels = [] for range in correct_est.data: boundaries.append([range.start.value, range.end.value]) # TODO: Multiple contexts. Right now MSAF algorithms only estimate one # single layer, so it is not really necessary yet. if labels_id is not None: labels.append(range.label.value) return np.asarray(boundaries), np.asarray(labels, dtype=int)
def get_levels(): """Obtains the set of unique levels contained in the jams sorted by the number of segments they contain. Returns ------- levels : np.array Level identifiers for the entire hierarchy. """ levels = [] jam = jams2.load(jams_file) annotation = jam.sections[annotation_id] for segment in annotation.data: if segment.label.context not in exclude_levels: levels.append(segment.label.context) c = Counter(levels) # Count frequency return np.asarray(c.keys())[np.argsort(c.values())] # Sort
def fit_model(X, Y, B, T, n_jobs, annot_beats, ds_path, ds_name): SIGMA = 10 ** np.arange(-2, 18) best_score = -np.inf best_sigma = None model = None print len(X) for sig in SIGMA: O = OLDA.OLDA(sigma=sig) O.fit(X, Y) scores = [] files = msaf.io.get_dataset_files(ds_path, ds_name=ds_name) for f, z in zip(files, zip(X, B, T)): f = f.ref_file if annot_beats: jam = jams2.load(f) if jam.beats == []: continue if jam.beats[0].data == []: continue print "\t\tProcessing ", f scores.append(score_model(O.components_, *z)) #scores = Parallel(n_jobs=n_jobs)( delayed(score_model)(O.components_, *z) for z in zip(X, B, T)) mean_score = np.mean(scores) print 'Sigma=%.2e, score=%.3f' % (sig, mean_score) if mean_score > best_score: best_score = mean_score best_sigma = sig model = O.components_ print 'Best sigma: %.2e' % best_sigma return model
def fit_model(X, Y, B, T, n_jobs, annot_beats, ds_path, ds_name): SIGMA = 10**np.arange(-2, 18) best_score = -np.inf best_sigma = None model = None print len(X) for sig in SIGMA: O = OLDA.OLDA(sigma=sig) O.fit(X, Y) scores = [] files = msaf.io.get_dataset_files(ds_path, ds_name=ds_name) for f, z in zip(files, zip(X, B, T)): f = f.ref_file if annot_beats: jam = jams2.load(f) if jam.beats == []: continue if jam.beats[0].data == []: continue print "\t\tProcessing ", f scores.append(score_model(O.components_, *z)) #scores = Parallel(n_jobs=n_jobs)( delayed(score_model)(O.components_, *z) for z in zip(X, B, T)) mean_score = np.mean(scores) print 'Sigma=%.2e, score=%.3f' % (sig, mean_score) if mean_score > best_score: best_score = mean_score best_sigma = sig model = O.components_ print 'Best sigma: %.2e' % best_sigma return model
def save_estimations(out_file, times, labels, boundaries_id, labels_id, **params): """Saves the segment estimations in a JAMS file.close Parameters ---------- out_file : str Path to the output JAMS file in which to save the estimations. times : np.array or list Estimated boundary times. If `list`, estimated hierarchical boundaries. labels : np.array(N, 2) Estimated labels (None in case we are only storing boundary evaluations). boundaries_id : str Boundary algorithm identifier. labels_id : str Labels algorithm identifier. params : dict Dictionary with additional parameters for both algorithms. """ # Convert to intervals and sanity check if 'numpy' in str(type(times)): inters = utils.times_to_intervals(times) assert len(inters) == len(labels), "Number of boundary intervals " \ "(%d) and labels (%d) do not match" % (len(inters), len(labels)) # Put into lists to simplify the writing process later inters = [inters] labels = [labels] else: inters = [] for level in range(len(times)): est_inters = utils.times_to_intervals(times[level]) inters.append(est_inters) assert len(inters[level]) == len(labels[level]), \ "Number of boundary intervals (%d) and labels (%d) do not match" % \ (len(inters[level]), len(labels[level])) curr_estimation = None curr_i = -1 # Find estimation in file if os.path.isfile(out_file): jam = jams2.load(out_file) all_estimations = jam.sections curr_estimation, curr_i = find_estimation(all_estimations, boundaries_id, labels_id, params, out_file) else: # Create new JAMS if it doesn't exist jam = jams2.Jams() jam.metadata.title = os.path.basename(out_file).replace( msaf.Dataset.estimations_ext, "") # Create new annotation if needed if curr_estimation is None: curr_estimation = jam.sections.create_annotation() # Save metadata and parameters curr_estimation.annotation_metadata.attribute = "sections" curr_estimation.annotation_metadata.version = msaf.__version__ curr_estimation.annotation_metadata.origin = "MSAF" sandbox = {} sandbox["boundaries_id"] = boundaries_id sandbox["labels_id"] = labels_id sandbox["timestamp"] = \ datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S") for key in params: sandbox[key] = params[key] curr_estimation.sandbox = sandbox # Save actual data curr_estimation.data = [] for i, (level_inters, level_labels) in enumerate(zip(inters, labels)): if level_labels is None: label = np.ones(len(inters)) * -1 for bound_inter, label in zip(level_inters, level_labels): segment = curr_estimation.create_datapoint() segment.start.value = float(bound_inter[0]) segment.start.confidence = 0.0 segment.end.value = float(bound_inter[1]) segment.end.confidence = 0.0 segment.label.value = label segment.label.confidence = 0.0 segment.label.context = "level_%d" % i # Place estimation in its place if curr_i != -1: jam.sections[curr_i] = curr_estimation # Write file and do not let users interrupt it my_thread = Thread(target=safe_write, args=( jam, out_file, )) my_thread.start() my_thread.join()
def read_estimations(est_file, boundaries_id, labels_id=None, **params): """Reads the estimations (boundaries and/or labels) from a jams file containing the estimations of an algorithm. Parameters ---------- est_file : str Path to the estimated file (JAMS file). boundaries_id : str Identifier of the algorithm used to compute the boundaries. labels_id : str Identifier of the algorithm used to compute the labels. params : dict Additional search parameters. E.g. {"feature" : "hpcp"}. Returns ------- boundaries : np.array((N,2)) Array containing the estimated boundaries in intervals. labels : np.array(N) Array containing the estimated labels. Empty array if labels_id is None. """ # Open file and read jams try: jam = jams2.load(est_file) except: logging.error("Could not open JAMS file %s" % est_file) return np.array([]), np.array([]) # Get all the estimations for the sections all_estimations = jam.sections # Find correct estimation correct_est, i = find_estimation(all_estimations, boundaries_id, labels_id, params, est_file) if correct_est is None: logging.error("Could not find estimation in %s" % est_file) return np.array([]), np.array([]) # Retrieve unique levels of segmentation levels = [] for range in correct_est.data: levels.append(range.label.context) levels = list(set(levels)) # Retrieve data all_boundaries = [] all_labels = [] for level in levels: boundaries = [] labels = [] for range in correct_est.data: if level == range.label.context: boundaries.append([range.start.value, range.end.value]) if labels_id is not None: labels.append(range.label.value) all_boundaries.append(np.asarray(boundaries)) all_labels.append(np.asarray(labels, dtype=int)) # If there is only one level, return np.arrays instead of lists if len(levels) == 1: all_boundaries = all_boundaries[0] all_labels = all_labels[0] return all_boundaries, all_labels
def get_features(audio_path, annot_beats=False, framesync=False, pre_features=None): """ Gets the features of an audio file given the audio_path. Parameters ---------- audio_path: str Path to the audio file. annot_beats: bool Whether to use annotated beats or not. framesync: bool Whether to use framesync features or not. pre_features: dict Pre computed features as a dictionary. `None` for reading them form the json file. Return ------ C: np.array((N, 12)) (Beat-sync) Chromagram M: np.array((N, 13)) (Beat-sync) MFCC T: np.array((N, 6)) (Beat-sync) Tonnetz cqt: np.array((N, msaf.Anal.cqt_bins)) (Beat-sync) Constant-Q transform beats: np.array(T) Beats in seconds dur: float Song duration analysis : dict Parameters of analysis of track (e.g. sampling rate) """ if pre_features is None: # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read Estimations features_path = os.path.join(ds_path, msaf.Dataset.features_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext) with open(features_path, "r") as f: feats = json.load(f) # Beat Synchronous Feats if framesync: feat_str = "framesync" beats = None else: if annot_beats: # Read references try: annotation_path = os.path.join( ds_path, msaf.Dataset.references_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext) jam = jams2.load(annotation_path) except: raise RuntimeError("No references found in file %s" % annotation_path) feat_str = "ann_beatsync" beats = [] beat_data = jam.beats[0].data if beat_data == []: raise ValueError for data in beat_data: beats.append(data.time.value) beats = np.unique(beats) else: feat_str = "est_beatsync" beats = np.asarray(feats["beats"]["times"]) C = np.asarray(feats[feat_str]["hpcp"]) M = np.asarray(feats[feat_str]["mfcc"]) T = np.asarray(feats[feat_str]["tonnetz"]) cqt = np.asarray(feats[feat_str]["cqt"]) '''Mi: added the Gammatone features''' G = np.asarray(feats[feat_str]["gmt"]) analysis = feats["analysis"] dur = analysis["dur"] # Frame times might be shorter than the actual number of features. if framesync: frame_times = utils.get_time_frames(dur, analysis) C = C[:len(frame_times)] M = M[:len(frame_times)] T = T[:len(frame_times)] G = G[:len(frame_times)] else: feat_prefix = "" if not framesync: feat_prefix = "bs_" C = pre_features["%shpcp" % feat_prefix] M = pre_features["%smfcc" % feat_prefix] T = pre_features["%stonnetz" % feat_prefix] cqt = pre_features["%scqt" % feat_prefix] G = pre_features["%sgmt" % feat_prefix] beats = pre_features["beats"] dur = pre_features["anal"]["dur"] analysis = pre_features["anal"] return C, M, T, cqt, G, beats, dur, analysis
def save_estimations(out_file, times, labels, boundaries_id, labels_id, **params): """Saves the segment estimations in a JAMS file.close Parameters ---------- out_file : str Path to the output JAMS file in which to save the estimations. times : np.array or list Estimated boundary times. If `list`, estimated hierarchical boundaries. labels : np.array(N, 2) Estimated labels (None in case we are only storing boundary evaluations). boundaries_id : str Boundary algorithm identifier. labels_id : str Labels algorithm identifier. params : dict Dictionary with additional parameters for both algorithms. """ # Convert to intervals and sanity check if 'numpy' in str(type(times)): inters = utils.times_to_intervals(times) assert len(inters) == len(labels), "Number of boundary intervals " \ "(%d) and labels (%d) do not match" % (len(inters), len(labels)) # Put into lists to simplify the writing process later inters = [inters] labels = [labels] else: inters = [] for level in range(len(times)): est_inters = utils.times_to_intervals(times[level]) inters.append(est_inters) assert len(inters[level]) == len(labels[level]), \ "Number of boundary intervals (%d) and labels (%d) do not match" % \ (len(inters[level]), len(labels[level])) curr_estimation = None curr_i = -1 # Find estimation in file if os.path.isfile(out_file): jam = jams2.load(out_file) all_estimations = jam.sections curr_estimation, curr_i = find_estimation( all_estimations, boundaries_id, labels_id, params, out_file) else: # Create new JAMS if it doesn't exist jam = jams2.Jams() jam.metadata.title = os.path.basename(out_file).replace( msaf.Dataset.estimations_ext, "") # Create new annotation if needed if curr_estimation is None: curr_estimation = jam.sections.create_annotation() # Save metadata and parameters curr_estimation.annotation_metadata.attribute = "sections" curr_estimation.annotation_metadata.version = msaf.__version__ curr_estimation.annotation_metadata.origin = "MSAF" sandbox = {} sandbox["boundaries_id"] = boundaries_id sandbox["labels_id"] = labels_id sandbox["timestamp"] = \ datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S") for key in params: sandbox[key] = params[key] curr_estimation.sandbox = sandbox # Save actual data curr_estimation.data = [] for i, (level_inters, level_labels) in enumerate(zip(inters, labels)): if level_labels is None: label = np.ones(len(inters)) * -1 for bound_inter, label in zip(level_inters, level_labels): segment = curr_estimation.create_datapoint() segment.start.value = float(bound_inter[0]) segment.start.confidence = 0.0 segment.end.value = float(bound_inter[1]) segment.end.confidence = 0.0 segment.label.value = label segment.label.confidence = 0.0 segment.label.context = "level_%d" % i # Place estimation in its place if curr_i != -1: jam.sections[curr_i] = curr_estimation # Write file and do not let users interrupt it my_thread = Thread(target=safe_write, args=(jam, out_file,)) my_thread.start() my_thread.join()
def save_estimations(out_file, boundaries, labels, boundaries_id, labels_id, **params): """Saves the segment estimations in a JAMS file.close Parameters ---------- out_file : str Path to the output JAMS file in which to save the estimations. boundaries : np.array((N, 2)) Estimated boundary intervals. labels : np.array(N, 2) Estimated labels (None in case we are only storing boundary evaluations). boundaries_id : str Boundary algorithm identifier. labels_id : str Labels algorithm identifier. params : dict Dictionary with additional parameters for both algorithms. """ # Sanity Check assert len(boundaries) == len(labels), "Number of boundary intervals " \ "(%d) and labels (%d) do not match" % (len(boundaries), len(labels)) print boundaries, labels curr_estimation = None curr_i = -1 # Find estimation in file if os.path.isfile(out_file): jam = jams2.load(out_file) all_estimations = jam.sections curr_estimation, curr_i = find_estimation(all_estimations, boundaries_id, labels_id, params, out_file) else: # Create new JAMS if it doesn't exist jam = jams2.Jams() jam.metadata.title = os.path.basename(out_file).replace( msaf.Dataset.estimations_ext, "") # Create new annotation if needed if curr_estimation is None: curr_estimation = jam.sections.create_annotation() # Save metadata and parameters curr_estimation.annotation_metadata.attribute = "sections" curr_estimation.annotation_metadata.version = msaf.__version__ curr_estimation.annotation_metadata.origin = "MSAF" sandbox = {} sandbox["boundaries_id"] = boundaries_id sandbox["labels_id"] = labels_id sandbox["timestamp"] = \ datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S") for key in params: sandbox[key] = params[key] curr_estimation.sandbox = sandbox # Save actual data curr_estimation.data = [] if labels is None: label = np.ones(len(boundaries)) * -1 for bound_inter, label in zip(boundaries, labels): segment = curr_estimation.create_datapoint() segment.start.value = float(bound_inter[0]) segment.start.confidence = 0.0 segment.end.value = float(bound_inter[1]) segment.end.confidence = 0.0 segment.label.value = label segment.label.confidence = 0.0 segment.label.context = "msaf" # TODO: Use multiple contex # Place estimation in its place if curr_i != -1: jam.sections[curr_i] = curr_estimation # Write file and do not let users interrupt it my_thread = Thread(target=safe_write, args=( jam, out_file, )) my_thread.start() my_thread.join()
def get_features(audio_path, annot_beats=False, framesync=False): """ Gets the features of an audio file given the audio_path. Parameters ---------- audio_path: str Path to the audio file. annot_beats: bool Whether to use annotated beats or not. framesync: bool Whether to use framesync features or not. Return ------ C: np.array((N, 12)) (Beat-sync) Chromagram M: np.array((N, 13)) (Beat-sync) MFCC T: np.array((N, 6)) (Beat-sync) Tonnetz beats: np.array(T) Beats in seconds dur: float Song duration analysis : dict Parameters of analysis of track (e.g. sampling rate) """ # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read Estimations features_path = os.path.join( ds_path, msaf.Dataset.features_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext) with open(features_path, "r") as f: feats = json.load(f) # Beat Synchronous Feats if framesync: feat_str = "framesync" beats = None else: if annot_beats: # Read references try: annotation_path = os.path.join( ds_path, msaf.Dataset.references_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext) jam = jams2.load(annotation_path) except: raise RuntimeError("No references found in file %s" % annotation_path) feat_str = "ann_beatsync" beats = [] beat_data = jam.beats[0].data if beat_data == []: raise ValueError for data in beat_data: beats.append(data.time.value) beats = np.unique(beats) else: feat_str = "est_beatsync" beats = np.asarray(feats["beats"]["times"])[0] C = np.asarray(feats[feat_str]["hpcp"]) M = np.asarray(feats[feat_str]["mfcc"]) T = np.asarray(feats[feat_str]["tonnetz"]) analysis = feats["analysis"] # Duration # TODO: Essentia fix! feat_frames = np.asarray(feats["framesync"]["hpcp"]) dur = feat_frames.shape[0] * analysis["hop_size"] / \ float(analysis["sample_rate"]) #dur = jam.metadata.duration return C, M, T, beats, dur, analysis
def get_features(audio_path, annot_beats=False, framesync=False, pre_features=None): """ Gets the features of an audio file given the audio_path. Parameters ---------- audio_path: str Path to the audio file. annot_beats: bool Whether to use annotated beats or not. framesync: bool Whether to use framesync features or not. pre_features: dict Pre computed features as a dictionary. `None` for reading them form the json file. Return ------ C: np.array((N, 12)) (Beat-sync) Chromagram M: np.array((N, 13)) (Beat-sync) MFCC T: np.array((N, 6)) (Beat-sync) Tonnetz cqt: np.array((N, msaf.Anal.cqt_bins)) (Beat-sync) Constant-Q transform beats: np.array(T) Beats in seconds dur: float Song duration analysis : dict Parameters of analysis of track (e.g. sampling rate) """ if pre_features is None: # Dataset path ds_path = os.path.dirname(os.path.dirname(audio_path)) # Read Estimations features_path = os.path.join( ds_path, msaf.Dataset.features_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.features_ext) with open(features_path, "r") as f: feats = json.load(f) # Beat Synchronous Feats if framesync: feat_str = "framesync" beats = None else: if annot_beats: # Read references try: annotation_path = os.path.join( ds_path, msaf.Dataset.references_dir, os.path.basename(audio_path)[:-4] + msaf.Dataset.references_ext) jam = jams2.load(annotation_path) except: raise RuntimeError("No references found in file %s" % annotation_path) feat_str = "ann_beatsync" beats = [] beat_data = jam.beats[0].data if beat_data == []: raise ValueError for data in beat_data: beats.append(data.time.value) beats = np.unique(beats) else: feat_str = "est_beatsync" beats = np.asarray(feats["beats"]["times"]) C = np.asarray(feats[feat_str]["hpcp"]) M = np.asarray(feats[feat_str]["mfcc"]) T = np.asarray(feats[feat_str]["tonnetz"]) cqt = np.asarray(feats[feat_str]["cqt"]) '''Mi: added the Gammatone features''' G = np.asarray(feats[feat_str]["gmt"]) analysis = feats["analysis"] dur = analysis["dur"] # Frame times might be shorter than the actual number of features. if framesync: frame_times = utils.get_time_frames(dur, analysis) C = C[:len(frame_times)] M = M[:len(frame_times)] T = T[:len(frame_times)] G = G[:len(frame_times)] else: feat_prefix = "" if not framesync: feat_prefix = "bs_" C = pre_features["%shpcp" % feat_prefix] M = pre_features["%smfcc" % feat_prefix] T = pre_features["%stonnetz" % feat_prefix] cqt = pre_features["%scqt" % feat_prefix] G = pre_features["%sgmt" % feat_prefix] beats = pre_features["beats"] dur = pre_features["anal"]["dur"] analysis = pre_features["anal"] return C, M, T, cqt, G, beats, dur, analysis