def __init__(self, onset_prob=0.8, note_prob=0.8, offset_prob=0.15, end_prob=0.8, attack_length=0.04, decay_length=0.04, release_length=0.02, onset_threshold=None, note_threshold=None, complete=True, fps=50., **kwargs): # state space self.st = ADSRStateSpace(attack_length=int(attack_length * fps), decay_length=int(decay_length * fps), release_length=int(release_length * fps)) # transition model self.tm = ADSRTransitionModel(self.st, onset_prob=onset_prob, note_prob=note_prob, offset_prob=offset_prob, end_prob=end_prob) # observation model self.om = ADSRObservationModel(self.st) # instantiate a HMM self.hmm = HiddenMarkovModel(self.tm, self.om, None) # save variables self.onset_threshold = onset_threshold self.note_threshold = note_threshold self.complete = complete self.fps = fps
class ADSRNoteTrackingProcessor(Processor): ONSET_PROB = 0.8 NOTE_PROB = 0.8 OFFSET_PROB = 0.15 pitch_offset = 21 def __init__(self, onset_prob=0.8, note_prob=0.8, offset_prob=0.15, end_prob=0.8, attack_length=0.04, decay_length=0.04, release_length=0.02, onset_threshold=None, note_threshold=None, complete=True, fps=50., **kwargs): # state space self.st = ADSRStateSpace(attack_length=int(attack_length * fps), decay_length=int(decay_length * fps), release_length=int(release_length * fps)) # transition model self.tm = ADSRTransitionModel(self.st, onset_prob=onset_prob, note_prob=note_prob, offset_prob=offset_prob, end_prob=end_prob) # observation model self.om = ADSRObservationModel(self.st) # instantiate a HMM self.hmm = HiddenMarkovModel(self.tm, self.om, None) # save variables self.onset_threshold = onset_threshold self.note_threshold = note_threshold self.complete = complete self.fps = fps def process(self, activations, **kwargs): """ Detect the notes in the given activation function. Parameters ---------- activations : numpy array Note activation function. Returns ------- onsets : numpy array Detected notes [seconds, pitches]. """ notes = [] paths = [] note_path = np.arange(self.st.attack, self.st.release) # process ech pitch individually for pitch in range(activations.shape[1]): # decode activations for this pitch with HMM path, _ = self.hmm.viterbi(activations[:, pitch, :]) paths.append(path) # extract HMM note segments segments = np.logical_and(path > self.st.attack, path < self.st.release) # extract start and end positions (transition points) idx = np.nonzero(np.diff(segments.astype(np.int)))[0] # add end if needed if len(idx) % 2 != 0: idx = np.append(idx, [len(activations)]) # all sounding frames frames = activations[:, pitch, 0] # all frames with onset activations onsets = activations[:, pitch, 1] # iterate over all segments to decide which to keep for onset, offset in idx.reshape((-1, 2)): # extract note segment segment = path[onset:offset] # discard segment which do not contain the complete note path if self.complete and np.setdiff1d(note_path, segment).any(): continue # discard segments without a real note if frames[onset:offset].max() < self.note_threshold: continue # discard segments without a real onset if onsets[onset:offset].max() < self.onset_threshold: continue # append segment as note notes.append([ onset / self.fps, pitch + self.pitch_offset, (offset - onset) / self.fps ]) # sort the notes, convert timing information and return them notes = np.array(sorted(notes), ndmin=2) return notes, np.array(paths)
def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, transition_lambda=TRANSITION_LAMBDA, norm_observations=NORM_OBSERVATIONS, downbeats=False, fps=None, **kwargs): """ Track the beats and downbeats with a Dynamic Bayesian Network (DBN) approximated by a Hidden Markov Model (HMM). :param pattern_files: list of files with the patterns (including the fitted GMMs and information about the number of beats) Parameters for the transition model: Each of the following arguments expect a list with as many items as rhythmic patterns. :param min_bpm: list with minimum tempi used for tracking :param max_bpm: list with maximum tempi used for tracking :param num_tempo_states: list with number of tempo states (if set, limit the number of states and use a log spacing, otherwise a linear spacing). If a single value is given, the same value is assumed for all patterns. :param transition_lambda: (list with) lambda(s) for the exponential tempo change distribution (higher values prefer a constant tempo over a tempo change from one beat to the next one). If a single value is given, the same value is assumed for all patterns. Parameters for the observation model: :param norm_observations: normalise the observations Other parameters: :param downbeats: report only the downbeats (default: beats and the respective position) "Rhythmic Pattern Modeling for Beat and Downbeat Tracking in Musical Audio" Florian Krebs, Sebastian Böck and Gerhard Widmer Proceedings of the 15th International Society for Music Information Retrieval Conference (ISMIR), 2013 Instead of the originally proposed state space and transition model for the DBN, the following is used: "An Efficient State Space Model for Joint Tempo and Meter Tracking" Florian Krebs, Sebastian Böck and Gerhard Widmer Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. """ # pylint: disable=unused-argument # pylint: disable=no-name-in-module from madmom.ml.hmm import HiddenMarkovModel as Hmm from .beats_hmm import (DownBeatTrackingStateSpace as St, DownBeatTrackingTransitionModel as Tm, GMMDownBeatTrackingObservationModel as Om) # expand num_tempo_states and transition_lambda to lists if needed if not isinstance(num_tempo_states, list): num_tempo_states = [num_tempo_states] * len(num_tempo_states) if not isinstance(transition_lambda, list): transition_lambda = [transition_lambda] * len(num_tempo_states) # check if all lists have the same length if not (len(min_bpm) == len(max_bpm) == len(num_tempo_states) == len(transition_lambda) == len(pattern_files)): raise ValueError('`min_bpm`, `max_bpm`, `num_tempo_states` and ' '`transition_lambda` must have the same length ' 'as number of patterns.') # load the patterns import cPickle patterns = [] for pattern_file in pattern_files: with open(pattern_file, 'r') as f: patterns.append(cPickle.load(f)) if len(patterns) == 0: raise ValueError('at least one rhythmical pattern must be given.') # extract the GMMs and number of beats gmms = [p['gmms'] for p in patterns] self.num_beats = [p['num_beats'] for p in patterns] # save additional variables self.downbeats = downbeats self.fps = fps # convert timing information to construct state space # Note: since we model a complete bar, we must multiply the intervals # by the number of beats in that pattern min_interval = 60. * self.fps / np.asarray(max_bpm) * self.num_beats max_interval = 60. * self.fps / np.asarray(min_bpm) * self.num_beats # state space self.st = St(min_interval, max_interval, num_tempo_states) # transition model self.tm = Tm(self.st, transition_lambda) # observation model self.om = Om(gmms, self.st, norm_observations) # instantiate a HMM self.hmm = Hmm(self.tm, self.om, None)
class DownbeatTrackingProcessor(Processor): """ Beat and downbeat tracking with a dynamic Bayesian network (DBN). """ # TODO: this should not be lists (lists are mutable!) MIN_BPM = [55, 60] MAX_BPM = [205, 225] NUM_TEMPO_STATES = [None, None] TRANSITION_LAMBDA = [100, 100] NORM_OBSERVATIONS = False def __init__(self, pattern_files, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, transition_lambda=TRANSITION_LAMBDA, norm_observations=NORM_OBSERVATIONS, downbeats=False, fps=None, **kwargs): """ Track the beats and downbeats with a Dynamic Bayesian Network (DBN) approximated by a Hidden Markov Model (HMM). :param pattern_files: list of files with the patterns (including the fitted GMMs and information about the number of beats) Parameters for the transition model: Each of the following arguments expect a list with as many items as rhythmic patterns. :param min_bpm: list with minimum tempi used for tracking :param max_bpm: list with maximum tempi used for tracking :param num_tempo_states: list with number of tempo states (if set, limit the number of states and use a log spacing, otherwise a linear spacing). If a single value is given, the same value is assumed for all patterns. :param transition_lambda: (list with) lambda(s) for the exponential tempo change distribution (higher values prefer a constant tempo over a tempo change from one beat to the next one). If a single value is given, the same value is assumed for all patterns. Parameters for the observation model: :param norm_observations: normalise the observations Other parameters: :param downbeats: report only the downbeats (default: beats and the respective position) "Rhythmic Pattern Modeling for Beat and Downbeat Tracking in Musical Audio" Florian Krebs, Sebastian Böck and Gerhard Widmer Proceedings of the 15th International Society for Music Information Retrieval Conference (ISMIR), 2013 Instead of the originally proposed state space and transition model for the DBN, the following is used: "An Efficient State Space Model for Joint Tempo and Meter Tracking" Florian Krebs, Sebastian Böck and Gerhard Widmer Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. """ # pylint: disable=unused-argument # pylint: disable=no-name-in-module from madmom.ml.hmm import HiddenMarkovModel as Hmm from .beats_hmm import (DownBeatTrackingStateSpace as St, DownBeatTrackingTransitionModel as Tm, GMMDownBeatTrackingObservationModel as Om) # expand num_tempo_states and transition_lambda to lists if needed if not isinstance(num_tempo_states, list): num_tempo_states = [num_tempo_states] * len(num_tempo_states) if not isinstance(transition_lambda, list): transition_lambda = [transition_lambda] * len(num_tempo_states) # check if all lists have the same length if not (len(min_bpm) == len(max_bpm) == len(num_tempo_states) == len(transition_lambda) == len(pattern_files)): raise ValueError('`min_bpm`, `max_bpm`, `num_tempo_states` and ' '`transition_lambda` must have the same length ' 'as number of patterns.') # load the patterns import cPickle patterns = [] for pattern_file in pattern_files: with open(pattern_file, 'r') as f: patterns.append(cPickle.load(f)) if len(patterns) == 0: raise ValueError('at least one rhythmical pattern must be given.') # extract the GMMs and number of beats gmms = [p['gmms'] for p in patterns] self.num_beats = [p['num_beats'] for p in patterns] # save additional variables self.downbeats = downbeats self.fps = fps # convert timing information to construct state space # Note: since we model a complete bar, we must multiply the intervals # by the number of beats in that pattern min_interval = 60. * self.fps / np.asarray(max_bpm) * self.num_beats max_interval = 60. * self.fps / np.asarray(min_bpm) * self.num_beats # state space self.st = St(min_interval, max_interval, num_tempo_states) # transition model self.tm = Tm(self.st, transition_lambda) # observation model self.om = Om(gmms, self.st, norm_observations) # instantiate a HMM self.hmm = Hmm(self.tm, self.om, None) def process(self, activations): """ Detect the beats in the given activation function. :param activations: beat activation function :return: detected beat positions [seconds] """ # get the best state path by calling the viterbi algorithm path, _ = self.hmm.viterbi(activations) # get the corresponding pattern (use only the first state, since it # doesn't change throughout the sequence) pattern = self.st.pattern(path[0]) # the position inside the pattern (0..1) position = self.st.position(path) # beat position (= weighted by number of beats in bar) beat_counter = (position * self.num_beats[pattern]).astype(int) # transitions are the points where the beat counters change # FIXME: we might miss the first or last beat! # we could calculate the interval towards the beginning/end to # decide whether to include these points beat_positions = np.nonzero(np.diff(beat_counter))[0] + 1 # the beat numbers are the counters + 1 at the transition points beat_numbers = beat_counter[beat_positions] + 1 # convert the detected beats to a list of timestamps beats = np.asarray(beat_positions) / float(self.fps) # return the downbeats or beats and their beat number if self.downbeats: return beats[beat_numbers == 1] else: return zip(beats, beat_numbers) @classmethod def add_arguments(cls, parser, pattern_files=None, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, transition_lambda=TRANSITION_LAMBDA, norm_observations=NORM_OBSERVATIONS): """ Add HMM related arguments to an existing parser. :param parser: existing argparse parser Parameters for the patterns (i.e. fitted GMMs): :param pattern_files: load the patterns from these files Parameters for the transition model: Each of the following arguments expect a list with as many items as rhythmic patterns. :param min_bpm: list with minimum tempi used for tracking :param max_bpm: list with maximum tempi used for tracking :param num_tempo_states: list with number of tempo states (if set, limit the number of states and use a log spacing, otherwise a linear spacing) :param transition_lambda: list with lambdas for the exponential tempo change distribution (higher values prefer a constant tempo over a tempo change from one bar to the next one) Parameters for the observation model: :param norm_observations: normalize the observations :return: downbeat argument parser group """ from madmom.utils import OverrideDefaultListAction # add GMM options if pattern_files is not None: g = parser.add_argument_group('GMM arguments') g.add_argument('--pattern_files', action=OverrideDefaultListAction, default=pattern_files, help='load the patterns (with the fitted GMMs) ' 'from these files (comma separated list)') # add HMM parser group g = parser.add_argument_group('dynamic Bayesian Network arguments') g.add_argument('--min_bpm', action=OverrideDefaultListAction, default=min_bpm, type=float, sep=',', help='minimum tempo (comma separated list with one ' 'value per pattern) [bpm, default=%(default)s]') g.add_argument('--max_bpm', action=OverrideDefaultListAction, default=max_bpm, type=float, sep=',', help='maximum tempo (comma separated list with one ' 'value per pattern) [bpm, default=%(default)s]') g.add_argument('--num_tempo_states', action=OverrideDefaultListAction, default=num_tempo_states, type=int, sep=',', help='limit the number of tempo states; if set, align ' 'them with a log spacing, otherwise linearly ' '(comma separated list with one value per pattern)' ' [default=%(default)s]') g.add_argument('--transition_lambda', action=OverrideDefaultListAction, default=transition_lambda, type=float, sep=',', help='lambda of the tempo transition distribution; ' 'higher values prefer a constant tempo over a ' 'tempo change from one bar to the next one (comma ' 'separated list with one value per pattern) ' '[default=%(default)s]') # observation model stuff if norm_observations: g.add_argument('--no_norm_obs', dest='norm_observations', action='store_false', default=norm_observations, help='do not normalize the observations of the HMM') else: g.add_argument('--norm_obs', dest='norm_observations', action='store_true', default=norm_observations, help='normalize the observations of the HMM') # add output format stuff g = parser.add_argument_group('output arguments') g.add_argument('--downbeats', action='store_true', default=False, help='output only the downbeats') # return the argument group so it can be modified if needed return g
def __init__(self, correct=CORRECT, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, transition_lambda=TRANSITION_LAMBDA, observation_lambda=OBSERVATION_LAMBDA, norm_observations=NORM_OBSERVATIONS, fps=None, **kwargs): """ Track the beats with a dynamic Bayesian network (DBN) approximated by a Hidden Markov Model (HMM). :param correct: correct the beats (i.e. align them to the nearest peak of the beat activation function) Parameters for the transition model: :param min_bpm: minimum tempo used for beat tracking :param max_bpm: maximum tempo used for beat tracking :param num_tempo_states: number of tempo states (if set, limit the number of states and use a log spacing, otherwise a linear spacing) :param transition_lambda: lambda for the exponential tempo change distribution (higher values prefer a constant tempo over a tempo change from one beat to the next one) Parameters for the observation model: :param observation_lambda: split one beat period into N parts, the first representing beat states and the remaining non-beat states :param norm_observations: normalize the observations "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music Styles" Sebastian Böck, Florian Krebs and Gerhard Widmer Proceedings of the 15th International Society for Music Information Retrieval Conference (ISMIR), 2014 Instead of the originally proposed state space and transition model for the DBN, the following is used: "An Efficient State Space Model for Joint Tempo and Meter Tracking" Florian Krebs, Sebastian Böck and Gerhard Widmer Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. """ # pylint: disable=unused-argument # pylint: disable=no-name-in-module from madmom.ml.hmm import HiddenMarkovModel as Hmm from .beats_hmm import (BeatTrackingStateSpace as St, BeatTrackingTransitionModel as Tm, BeatTrackingObservationModel as Om) # convert timing information to construct state space min_interval = 60. * fps / max_bpm max_interval = 60. * fps / min_bpm self.st = St(min_interval, max_interval, num_tempo_states) # transition model self.tm = Tm(self.st, transition_lambda) # observation model self.om = Om(self.st, observation_lambda, norm_observations) # instantiate a HMM self.hmm = Hmm(self.tm, self.om, None) # save variables self.fps = fps self.correct = correct
class DBNBeatTrackingProcessor(Processor): """ Beat tracking with RNNs and a dynamic Bayesian network (DBN). """ CORRECT = True NUM_TEMPO_STATES = None TRANSITION_LAMBDA = 100 OBSERVATION_LAMBDA = 16 NORM_OBSERVATIONS = False MIN_BPM = 55 MAX_BPM = 215 def __init__(self, correct=CORRECT, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, transition_lambda=TRANSITION_LAMBDA, observation_lambda=OBSERVATION_LAMBDA, norm_observations=NORM_OBSERVATIONS, fps=None, **kwargs): """ Track the beats with a dynamic Bayesian network (DBN) approximated by a Hidden Markov Model (HMM). :param correct: correct the beats (i.e. align them to the nearest peak of the beat activation function) Parameters for the transition model: :param min_bpm: minimum tempo used for beat tracking :param max_bpm: maximum tempo used for beat tracking :param num_tempo_states: number of tempo states (if set, limit the number of states and use a log spacing, otherwise a linear spacing) :param transition_lambda: lambda for the exponential tempo change distribution (higher values prefer a constant tempo over a tempo change from one beat to the next one) Parameters for the observation model: :param observation_lambda: split one beat period into N parts, the first representing beat states and the remaining non-beat states :param norm_observations: normalize the observations "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music Styles" Sebastian Böck, Florian Krebs and Gerhard Widmer Proceedings of the 15th International Society for Music Information Retrieval Conference (ISMIR), 2014 Instead of the originally proposed state space and transition model for the DBN, the following is used: "An Efficient State Space Model for Joint Tempo and Meter Tracking" Florian Krebs, Sebastian Böck and Gerhard Widmer Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. """ # pylint: disable=unused-argument # pylint: disable=no-name-in-module from madmom.ml.hmm import HiddenMarkovModel as Hmm from .beats_hmm import (BeatTrackingStateSpace as St, BeatTrackingTransitionModel as Tm, BeatTrackingObservationModel as Om) # convert timing information to construct state space min_interval = 60. * fps / max_bpm max_interval = 60. * fps / min_bpm self.st = St(min_interval, max_interval, num_tempo_states) # transition model self.tm = Tm(self.st, transition_lambda) # observation model self.om = Om(self.st, observation_lambda, norm_observations) # instantiate a HMM self.hmm = Hmm(self.tm, self.om, None) # save variables self.fps = fps self.correct = correct def process(self, activations): """ Detect the beats in the given activation function. :param activations: beat activation function :return: detected beat positions [seconds] """ # get the best state path by calling the viterbi algorithm path, _ = self.hmm.viterbi(activations) # correct the beat positions if needed if self.correct: beats = [] # for each detection determine the "beat range", i.e. states where # the pointers of the observation model are 0 beat_range = self.om.pointers[path] # get all change points between True and False idx = np.nonzero(np.diff(beat_range))[0] + 1 # if the first frame is in the beat range, add a change at frame 0 if not beat_range[0]: idx = np.r_[0, idx] # if the last frame is in the beat range, append the length of the # array if not beat_range[-1]: idx = np.r_[idx, beat_range.size] # iterate over all regions for left, right in idx.reshape((-1, 2)): # pick the frame with the highest activations value beats.append(np.argmax(activations[left:right]) + left) beats = np.asarray(beats) else: # just take the frames with the smallest beat state values from scipy.signal import argrelmin beats = argrelmin(self.st.position(path), mode='wrap')[0] # recheck if they are within the "beat range", i.e. the pointers # of the observation model for that state must be 0 # Note: interpolation and alignment of the beats to be at state 0 # does not improve results over this simple method beats = beats[self.om.pointers[path[beats]] == 0] # convert the detected beats to seconds return beats / float(self.fps) @classmethod def add_arguments(cls, parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempo_states=NUM_TEMPO_STATES, transition_lambda=TRANSITION_LAMBDA, observation_lambda=OBSERVATION_LAMBDA, norm_observations=NORM_OBSERVATIONS, correct=CORRECT): """ Add HMM related arguments to an existing parser object. :param parser: existing argparse parser object Parameters for the transition model: :param min_bpm: minimum tempo used for beat tracking :param max_bpm: maximum tempo used for beat tracking :param num_tempo_states: number of tempo states (if set, limit the number of states and use a log spacing, otherwise a linear spacing) :param transition_lambda: lambda for the exponential tempo change distribution (higher values prefer a constant tempo over a tempo change from one beat to the next one) Parameters for the observation model: :param observation_lambda: split one beat period into N parts, the first representing beat states and the remaining non-beat states :param norm_observations: normalize the observations Post-processing parameters: :param correct: correct the beat positions :return: beat argument parser group """ # pylint: disable=arguments-differ # add DBN parser group g = parser.add_argument_group('dynamic Bayesian Network arguments') if correct: g.add_argument('--no_correct', dest='correct', action='store_false', default=correct, help='do not correct the beat positions') else: g.add_argument('--correct', dest='correct', action='store_true', default=correct, help='correct the beat positions') # add a transition parameters g.add_argument('--min_bpm', action='store', type=float, default=min_bpm, help='minimum tempo [bpm, default=%(default).2f]') g.add_argument('--max_bpm', action='store', type=float, default=max_bpm, help='maximum tempo [bpm, default=%(default).2f]') g.add_argument('--num_tempo_states', action='store', type=int, default=num_tempo_states, help='limit the number of tempo states; if set, align ' 'them with a log spacing, otherwise linearly') g.add_argument('--transition_lambda', action='store', type=float, default=transition_lambda, help='lambda of the tempo transition distribution; ' 'higher values prefer a constant tempo over a ' 'tempo change from one beat to the next one ' '[default=%(default).1f]') # observation model stuff g.add_argument('--observation_lambda', action='store', type=int, default=observation_lambda, help='split one beat period into N parts, the first ' 'representing beat states and the remaining ' 'non-beat states [default=%(default)i]') if norm_observations: g.add_argument('--no_norm_obs', dest='norm_observations', action='store_false', default=norm_observations, help='do not normalize the observations of the DBN') else: g.add_argument('--norm_obs', dest='norm_observations', action='store_true', default=norm_observations, help='normalize the observations of the DBN') # return the argument group so it can be modified if needed return g