def predict(flac_path, title="", model_path="./model", diff_root_only=True, max_num_chord=4): label_path = "chord_labels.txt" # Estimate the bpm of the audio beat_proc = RNNBeatProcessor() tempo_proc = TempoEstimationProcessor(min_bpm=50, max_bpm=180, fps=100) beat_processed = beat_proc(flac_path) tempo_estimation = tempo_proc(beat_processed) BPM = BPM_selector(tempo_estimation) sec_per_beat = 60 / BPM sec_per_frame = 2048 / 16000 # set eighth note as the minimum duration of the chord min_duration = sec_per_beat / 2 # Read chord labels file with open(label_path) as f: with torch.no_grad(): chord_labels = ast.literal_eval(f.read()) # Process raw audio X = cqt_preprocess(flac_path) X = Variable( torch.from_numpy(np.expand_dims(X, axis=0)).float().cpu()) # Load model model = Net(1).cpu() state_dict = torch.load(model_path, map_location="cpu")["state_dict"] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) model.eval() # Estimate estimation = np.zeros((22, X.shape[2])) estimation = model(X).data.cpu()[0][0] estimation = to_probability(estimation) # Post-processing estimation = dp_post_processing(estimation) # predict_list_majmin = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_majmin) predict_list_seventh = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_seventh) text = '' for chord in predict_list_seventh: text += f'{chord[0]}\t{chord[1]}\t{chord[2]}\n' return text
def getMadmomTempo(filename): """ Call Madmom Tempo Estimation :return: Array of tempos sorted in decreasing order of strength """ from madmom.features.beats import RNNBeatProcessor from madmom.features.tempo import TempoEstimationProcessor act = RNNBeatProcessor()(filename) proc = TempoEstimationProcessor(fps=100) res = proc(act) return res[:, 0]
def add_tempo_arguments(cls, parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH): """ Add tempo related arguments to an existing parser. :param parser: existing argparse parser :param min_bpm: minimum tempo [bpm] :param max_bpm: maximum tempo [bpm] :param act_smooth: smooth the activations over N seconds :param hist_smooth: smooth the tempo histogram over N bins :return: tempo argument parser group """ # pylint: disable=arguments-differ # TODO: import the TempoEstimation here otherwise we have a # loop. This is super ugly, but right now I can't think of a # better solution... from madmom.features.tempo import TempoEstimationProcessor as tempo tempo.add_arguments(parser, method=None, min_bpm=min_bpm, max_bpm=max_bpm, act_smooth=act_smooth, hist_smooth=hist_smooth, alpha=None)
def madmom_features(self, fps=100): """ Call Madmom's implementation of RNN + DBN beat tracking. Madmom's results are returned in terms of seconds, but round and convert to be in terms of hop_size so that they line up with the features. The novelty function is also computed as a side effect (and is the bottleneck in the computation), so also return that Parameters ---------- fps: int Frames per second in processing Returns ------- { 'tempos': ndarray(n_levels, 2) An array of tempo estimates in beats per minute, along with their confidences 'onsets': ndarray(n_onsets) Array of onsets, where each onset indexes into a particular window 'novfn': ndarray(n_frames) Evaluation of the rnn audio novelty function at each audio frame, in time increments equal to self.hop_length 'snovfn': ndarray(n_frames) Superflux audio novelty function at each audio frame, in time increments equal to self.hop_length } """ from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor from madmom.features.tempo import TempoEstimationProcessor from madmom.features.onsets import SpectralOnsetProcessor from madmom.audio.filters import LogarithmicFilterbank beatproc = DBNBeatTrackingProcessor(fps=fps) tempoproc = TempoEstimationProcessor(fps=fps) novfn = RNNBeatProcessor()(self.audio_file) # This step is the computational bottleneck beats = beatproc(novfn) tempos = tempoproc(novfn) onsets = np.array(np.round(beats*self.fs/float(self.hop_length)), dtype=np.int64) # Resample the audio novelty function to correspond to the # correct hop length nframes = len(self.librosa_noveltyfn()) novfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(novfn))/float(fps), novfn) # For good measure, also compute and return superflux sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, \ filterbank=LogarithmicFilterbank,\ num_bands=24, log=np.log10) snovfn = sodf(self.audio_file) snovfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(snovfn))/float(fps), snovfn) return {'tempos':tempos, 'onsets':onsets, 'novfn':novfn, 'snovfn':snovfn}
def add_tempo_arguments(cls, parser, method=TEMPO_METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, alpha=ALPHA): """ Add tempo arguments to an existing parser. :param parser: existing argparse parser :param method: tempo estimation method ['comb', 'acf'] :param min_bpm: minimum tempo [bpm] :param max_bpm: maximum tempo [bpm] :param act_smooth: smooth the activations over N seconds :param hist_smooth: smooth the tempo histogram over N bins :param alpha: scaling factor of the comb filter :return: tempo argument parser group """ # TODO: import the TempoEstimation here otherwise we have a # loop. This is super ugly, but right now I can't think of a # better solution... from madmom.features.tempo import TempoEstimationProcessor as Tempo return Tempo.add_arguments(parser, method=method, min_bpm=min_bpm, max_bpm=max_bpm, act_smooth=act_smooth, hist_smooth=hist_smooth, alpha=alpha)
def __init__(self, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD, fps=None, **kwargs): """ Track the beats according to the previously determined (local) tempo by simply aligning them around the estimated position. :param look_aside: look this fraction of a beat interval to each side of the assumed next beat position to look for the most likely position of the next beat :param look_ahead: look N seconds in both directions to determine the local tempo and align the beats accordingly If `look_ahead` is not set, a constant tempo throughout the whole piece is assumed. If `look_ahead` is set, the local tempo (in a range +/- look_ahead seconds around the actual position) is estimated and then the next beat is tracked accordingly. This procedure is repeated from the new position to the end of the piece. "Enhanced Beat Tracking with Context-Aware Neural Networks" Sebastian Böck and Markus Schedl Proceedings of the 14th International Conference on Digital Audio Effects (DAFx), 2011 Instead of the auto-correlation based method for tempo estimation, it uses a comb filter per default. The behaviour can be controlled with the `tempo_method` parameter. """ # import the TempoEstimation here otherwise we have a loop from madmom.features.tempo import TempoEstimationProcessor # save variables self.look_aside = look_aside self.look_ahead = look_ahead self.fps = fps # tempo estimator self.tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs)
import time import pydub import numpy as np from pydub import AudioSegment from pydub.playback import play from multiprocessing import Process from madmom.features.tempo import TempoEstimationProcessor from madmom.features.beats import RNNBeatProcessor robot = stretch_body.robot.Robot() robot.startup() robot.stow() filename = "./audios/forest.wav" proc = TempoEstimationProcessor(fps=100) act = RNNBeatProcessor()(filename) tempo = proc(act) tempo = tempo[0][0] t = 60.0 / tempo * 4 # interonsets = np.ediff1d(onsets) # interonsets = np.add.reduceat(interonsets, np.arange(0, len(interonsets), 8)) # print(interonsets) xrotate = 3.14 xtilt = 0.5 xpan = 1 xwrist = 1.5
class BeatTrackingProcessor(Processor): """ Class for tracking beats with a simple tempo estimation and beat aligning. """ LOOK_ASIDE = 0.2 LOOK_AHEAD = 10 # tempo defaults TEMPO_METHOD = 'comb' MIN_BPM = 40 MAX_BPM = 240 ACT_SMOOTH = 0.09 HIST_SMOOTH = 7 ALPHA = 0.79 def __init__(self, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD, fps=None, **kwargs): """ Track the beats according to the previously determined (local) tempo by simply aligning them around the estimated position. :param look_aside: look this fraction of a beat interval to each side of the assumed next beat position to look for the most likely position of the next beat :param look_ahead: look N seconds in both directions to determine the local tempo and align the beats accordingly If `look_ahead` is not set, a constant tempo throughout the whole piece is assumed. If `look_ahead` is set, the local tempo (in a range +/- look_ahead seconds around the actual position) is estimated and then the next beat is tracked accordingly. This procedure is repeated from the new position to the end of the piece. "Enhanced Beat Tracking with Context-Aware Neural Networks" Sebastian Böck and Markus Schedl Proceedings of the 14th International Conference on Digital Audio Effects (DAFx), 2011 Instead of the auto-correlation based method for tempo estimation, it uses a comb filter per default. The behaviour can be controlled with the `tempo_method` parameter. """ # import the TempoEstimation here otherwise we have a loop from madmom.features.tempo import TempoEstimationProcessor # save variables self.look_aside = look_aside self.look_ahead = look_ahead self.fps = fps # tempo estimator self.tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) def process(self, activations): """ Detect the beats in the given activation function. :param activations: beat activation function :return: detected beat positions [seconds] """ # smooth activations act_smooth = int(self.fps * self.tempo_estimator.act_smooth) activations = smooth_signal(activations, act_smooth) # TODO: refactor interval stuff to use TempoEstimation # if look_ahead is not defined, assume a global tempo if self.look_ahead is None: # create a interval histogram histogram = self.tempo_estimator.interval_histogram(activations) # get the dominant interval interval = self.tempo_estimator.dominant_interval(histogram) # detect beats based on this interval detections = detect_beats(activations, interval, self.look_aside) else: # allow varying tempo look_ahead_frames = int(self.look_ahead * self.fps) # detect the beats detections = [] pos = 0 # TODO: make this _much_ faster! while pos < len(activations): # look N frames around the actual position start = pos - look_ahead_frames end = pos + look_ahead_frames if start < 0: # pad with zeros act = np.append(np.zeros(-start), activations[0:end]) elif end > len(activations): # append zeros accordingly zeros = np.zeros(end - len(activations)) act = np.append(activations[start:], zeros) else: act = activations[start:end] # create a interval histogram histogram = self.tempo_estimator.interval_histogram(act) # get the dominant interval interval = self.tempo_estimator.dominant_interval(histogram) # add the offset (i.e. the new detected start position) positions = detect_beats(act, interval, self.look_aside) # correct the beat positions positions += start # search the closest beat to the predicted beat position pos = positions[(np.abs(positions - pos)).argmin()] # append to the beats detections.append(pos) pos += interval # convert detected beats to a list of timestamps detections = np.array(detections) / float(self.fps) # remove beats with negative times and return them return detections[np.searchsorted(detections, 0):] # only return beats with a bigger inter beat interval than that of the # maximum allowed tempo # return np.append(detections[0], detections[1:][np.diff(detections) > # (60. / max_bpm)]) @classmethod def add_arguments(cls, parser, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD): """ Add beat tracking related arguments to an existing parser. :param parser: existing argparse parser :param look_aside: look this fraction of a beat interval to each side of the assumed next beat position to look for the most likely position of the next beat :param look_ahead: look N seconds in both directions to determine the local tempo and align the beats accordingly :return: beat argument parser group Parameters are included in the group only if they are not 'None'. """ # add beat detection related options to the existing parser g = parser.add_argument_group('beat detection arguments') # TODO: unify look_aside with CRFBeatDetection's interval_sigma if look_aside is not None: g.add_argument('--look_aside', action='store', type=float, default=look_aside, help='look this fraction of a beat interval to ' 'each side of the assumed next beat position ' 'to look for the most likely position of the ' 'next beat [default=%(default).2f]') if look_ahead is not None: g.add_argument('--look_ahead', action='store', type=float, default=look_ahead, help='look this many seconds in both directions ' 'to determine the local tempo and align the ' 'beats accordingly [default=%(default).2f]') # return the argument group so it can be modified if needed return g @classmethod def add_tempo_arguments(cls, parser, method=TEMPO_METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, alpha=ALPHA): """ Add tempo arguments to an existing parser. :param parser: existing argparse parser :param method: tempo estimation method ['comb', 'acf'] :param min_bpm: minimum tempo [bpm] :param max_bpm: maximum tempo [bpm] :param act_smooth: smooth the activations over N seconds :param hist_smooth: smooth the tempo histogram over N bins :param alpha: scaling factor of the comb filter :return: tempo argument parser group """ # TODO: import the TempoEstimation here otherwise we have a # loop. This is super ugly, but right now I can't think of a # better solution... from madmom.features.tempo import TempoEstimationProcessor as Tempo return Tempo.add_arguments(parser, method=method, min_bpm=min_bpm, max_bpm=max_bpm, act_smooth=act_smooth, hist_smooth=hist_smooth, alpha=alpha)