def CreateProcesser(fps=100): # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) frame_sizes = [1024, 2048, 4096] num_bands = [3, 6, 12] for frame_size, num_bands in zip(frame_sizes, num_bands): frames = FramedSignalProcessor(frame_size=frame_size, fps=fps) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=num_bands, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=1, add=1) diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, stft, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) return pre_processor
def __init__(self, **kwargs): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from madmom.processors import SequentialProcessor, ParallelProcessor # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in [4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) stft = ShortTimeFourierTransformProcessor( window=np.hamming(frame_size)) # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=12, fmin=30, fmax=16000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) #diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, stft, filt, spec))) #multi.append(SequentialProcessor((frames, stft, filt))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) super(PianoNoteProcessor, self).__init__(pre_processor)
def __init__(self, fs, hopsize_t): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.filters import MelFilterbank from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) # from madmom.features.onsets import _cnn_onset_processor_pad # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=fs) # process the multi-resolution spec in parallel multi = ParallelProcessor([]) for frame_size in [2048, 1024, 4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor( filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000, norm_filters=True, unique_filters=False) spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor([frames, stft, filt, spec])) # stack the features (in depth) and pad at beginning and end stack = np.dstack # pad = _cnn_onset_processor_pad # pre-processes everything sequentially pre_processor = SequentialProcessor([sig, multi, stack]) # instantiate a SequentialProcessor super(MadmomMelbank3ChannelsProcessor, self).__init__([pre_processor])
def __init__(self, fs, hopsize_t): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.filters import MelFilterbank from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) # from madmom.features.onsets import _cnn_onset_processor_pad # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=fs) # process the multi-resolution spec in parallel frames = FramedSignalProcessor(frame_size=2048, hopsize=int(fs * hopsize_t)) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000, norm_filters=True, unique_filters=False) spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON) # process each frame size with spec and diff sequentially single = SequentialProcessor([frames, stft, filt, spec]) # pre-processes everything sequentially pre_processor = SequentialProcessor([sig, single]) # instantiate a SequentialProcessor super(MadmomMelbankProcessor, self).__init__([pre_processor])
def __init__(self, fs, hopsize_t): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.filters import MelFilterbank from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=fs) frames = FramedSignalProcessor(frame_size=2048, hopsize=int(fs * hopsize_t)) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000, norm_filters=True, unique_filters=False) spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON) single = SequentialProcessor([frames, stft, filt, spec]) pre_processor = SequentialProcessor([sig, single]) super(MadmomMelbankProcessor, self).__init__([pre_processor])
def spec_from_midi(midi_file): sig_proc = SignalProcessor(num_channels=1, sample_rate=spec_params["sample_rate"]) fsig_proc = FramedSignalProcessor(frame_size=spec_params["frame_size"], fps=spec_params["fps"]) spec_proc = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank, num_bands=12, fmin=60, fmax=6000, norm_filters=True, unique_filters=False) log_proc = LogarithmicSpectrogramProcessor() processor = SequentialProcessor([sig_proc, fsig_proc, spec_proc, log_proc]) # print(midi_file) if not os.path.isfile(midi_file.replace('.mid', '.wav')): # render audio file from midi render_audio(midi_file, sound_font=SOUND_FONT_PATH) # compute spectrogram audio_path = midi_file.replace('.mid', '.wav') # if the spectrogram doesn't exist it will be computed and stored if not os.path.isfile(midi_file.replace('.mid', '.spec.npy')): spec = processor.process(audio_path).T np.save(midi_file.replace('.mid', '.spec'), spec) else: spec = np.load(midi_file.replace('.mid', '.spec.npy')) return spec
def create_feature_extraction_pipeline(sr=44100, frame_sizes=[1024, 2048, 4096], fps_hz=100.): audio_loading = Pipeline([ ("load_audio", FeatureExtractor(librosa.load, sr=sr, mono=True)), ("normalize", FeatureExtractor(librosa.util.normalize, norm=np.inf)) ]) sig = SignalProcessor(num_channels=1, sample_rate=sr) multi = ParallelProcessor([]) for frame_size in frame_sizes: frames = FramedSignalProcessor(frame_size=frame_size, fps=fps_hz) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank, num_bands=12, fmin=30, fmax=17000, norm_filters=True, unique_filters=True) spec = LogarithmicSpectrogramProcessor(log=np.log10, mul=5, add=1) diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor([frames, stft, filt, spec, diff])) feature_extractor = FeatureExtractor( SequentialProcessor([sig, multi, np.hstack])) feature_extraction_pipeline = Pipeline([("audio_loading", audio_loading), ("feature_extractor", feature_extractor)]) return feature_extraction_pipeline
def spectrogram_processor(spec_params): """Helper function for our spectrogram extraction.""" sig_proc = SignalProcessor(num_channels=1, sample_rate=spec_params['sample_rate']) fsig_proc = FramedSignalProcessor(frame_size=spec_params['frame_size'], fps=spec_params['fps']) spec_proc = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank, num_bands=12, fmin=60, fmax=6000, norm_filters=True, unique_filters=False) log_proc = LogarithmicSpectrogramProcessor() processor = SequentialProcessor([sig_proc, fsig_proc, spec_proc, log_proc]) return processor
def build_cnn(madmom_processor_filename): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) from madmom.ml.nn import NeuralNetworkEnsemble # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) frames = FramedSignalProcessor(frame_size=4096, hop_size=441 * 2) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000) # this is the money param! it was not whitelisted in 'canonicalize_audio_options'! spec = LogarithmicSpectrogramProcessor(add=1) # pre-processes everything sequentially pre_processor = SequentialProcessor([ sig, frames, stft, filt, spec, _cnn_pad ]) # process the pre-processed signal with a NN nn = NeuralNetworkEnsemble.load([madmom_processor_filename]) return madmom.processors.SequentialProcessor([pre_processor, nn])
def __init__(self, sr=44100, **kwargs): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) from madmom.ml.nn import NeuralNetworkEnsemble sr_ratio = 44100 / sr # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=sr) frames = FramedSignalProcessor(frame_size=4096 // sr_ratio, fps=50 // sr_ratio) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000) spec = LogarithmicSpectrogramProcessor(add=1) # pre-processes everything sequentially pre_processor = SequentialProcessor( (sig, frames, stft, filt, spec, _cnn_pad)) # process the pre-processed signal with a NN nn = NeuralNetworkEnsemble.load(VIENNA_MODEL_PATH) # instantiate a SequentialProcessor super().__init__((pre_processor, nn)) self.adsr = ADSRMaestro()
from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.filters import LogarithmicFilterbank from madmom.audio.spectrogram import FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor from madmom.processors import SequentialProcessor # init signal processing SAMPLE_RATE = 22050 FRAME_SIZE = 2048 FPS = 20 sig_proc = SignalProcessor(num_channels=1, sample_rate=SAMPLE_RATE) fsig_proc = FramedSignalProcessor(frame_size=FRAME_SIZE, fps=FPS, origin='future') spec_proc = FilteredSpectrogramProcessor( LogarithmicFilterbank, num_bands=16, fmin=30, fmax=6000) # num_bands=24, fmin=30, fmax=8000 log_spec_proc = LogarithmicSpectrogramProcessor() processor = SequentialProcessor( [sig_proc, fsig_proc, spec_proc, log_spec_proc]) colors = ['c', 'm', 'y'] def notes_to_onsets(notes, dt): """ Convert sequence of keys to onset frames """ onsets = [] for n in notes: onset = int(np.ceil(n[0] / dt)) onsets.append(onset)