def __init__(self, **kwargs): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from madmom.processors import SequentialProcessor, ParallelProcessor # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in [4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) stft = ShortTimeFourierTransformProcessor( window=np.hamming(frame_size)) # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=12, fmin=30, fmax=16000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) #diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, stft, filt, spec))) #multi.append(SequentialProcessor((frames, stft, filt))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) super(PianoNoteProcessor, self).__init__(pre_processor)
def __init__(self, fs, hopsize_t): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.filters import MelFilterbank from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) # from madmom.features.onsets import _cnn_onset_processor_pad # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=fs) # process the multi-resolution spec in parallel multi = ParallelProcessor([]) for frame_size in [2048, 1024, 4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor( filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000, norm_filters=True, unique_filters=False) spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor([frames, stft, filt, spec])) # stack the features (in depth) and pad at beginning and end stack = np.dstack # pad = _cnn_onset_processor_pad # pre-processes everything sequentially pre_processor = SequentialProcessor([sig, multi, stack]) # instantiate a SequentialProcessor super(MadmomMelbank3ChannelsProcessor, self).__init__([pre_processor])
def CreateProcesser(fps=100): # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) frame_sizes = [1024, 2048, 4096] num_bands = [3, 6, 12] for frame_size, num_bands in zip(frame_sizes, num_bands): frames = FramedSignalProcessor(frame_size=frame_size, fps=fps) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=num_bands, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=1, add=1) diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, stft, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) return pre_processor
def create_feature_extraction_pipeline(sr=44100, frame_sizes=[1024, 2048, 4096], fps_hz=100.): audio_loading = Pipeline([ ("load_audio", FeatureExtractor(librosa.load, sr=sr, mono=True)), ("normalize", FeatureExtractor(librosa.util.normalize, norm=np.inf)) ]) sig = SignalProcessor(num_channels=1, sample_rate=sr) multi = ParallelProcessor([]) for frame_size in frame_sizes: frames = FramedSignalProcessor(frame_size=frame_size, fps=fps_hz) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank, num_bands=12, fmin=30, fmax=17000, norm_filters=True, unique_filters=True) spec = LogarithmicSpectrogramProcessor(log=np.log10, mul=5, add=1) diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor([frames, stft, filt, spec, diff])) feature_extractor = FeatureExtractor( SequentialProcessor([sig, multi, np.hstack])) feature_extraction_pipeline = Pipeline([("audio_loading", audio_loading), ("feature_extractor", feature_extractor)]) return feature_extraction_pipeline
def __init__(self, **kwargs): # pylint: disable=unused-argument from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.spectrogram import ( FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from ..models import NOTES_BRNN from ..ml.nn import NeuralNetwork # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in [1024, 2048, 4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) filt = FilteredSpectrogramProcessor( num_bands=12, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) diff = SpectrogramDifferenceProcessor( diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) # process the pre-processed signal with a NN nn = NeuralNetwork.load(NOTES_BRNN[0]) # instantiate a SequentialProcessor super(RNNPianoNoteProcessor, self).__init__((pre_processor, nn))
def __init__(self, **kwargs): # pylint: disable=unused-argument from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.stft import ShortTimeFourierTransformProcessor from ..audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from ..models import NOTES_BRNN from ..ml.nn import NeuralNetwork # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in [1024, 2048, 4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=12, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, stft, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) # process the pre-processed signal with a NN nn = NeuralNetwork.load(NOTES_BRNN[0]) # instantiate a SequentialProcessor super(RNNPianoNoteProcessor, self).__init__((pre_processor, nn))
def __init__(self, **kwargs): # pylint: disable=unused-argument from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.filters import MelFilterbank from ..audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) from ..models import ONSETS_CNN from ..ml.nn import NeuralNetwork # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec in parallel multi = ParallelProcessor([]) for frame_size in [2048, 1024, 4096]: frames = FramedSignalProcessor(frame_size=frame_size, fps=100) filt = FilteredSpectrogramProcessor( filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000, norm_filters=True, unique_filters=False) spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, filt, spec))) # stack the features (in depth) and pad at beginning and end stack = np.dstack pad = _cnn_onset_processor_pad # pre-processes everything sequentially pre_processor = SequentialProcessor((sig, multi, stack, pad)) # process the pre-processed signal with a NN ensemble nn = NeuralNetwork.load(ONSETS_CNN[0]) # instantiate a SequentialProcessor super(CNNOnsetProcessor, self).__init__((pre_processor, nn))
def __init__(self, online=False, **kwargs): # pylint: disable=unused-argument from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.stft import ShortTimeFourierTransformProcessor from ..audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from ..models import ONSETS_RNN, ONSETS_BRNN from ..ml.nn import NeuralNetworkEnsemble # choose the appropriate models and set frame sizes accordingly if online: origin = 'online' nn_files = ONSETS_RNN frame_sizes = [512, 1024, 2048] else: origin = 'offline' nn_files = ONSETS_BRNN frame_sizes = [1024, 2048, 4096] # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in frame_sizes: frames = FramedSignalProcessor(frame_size=frame_size, fps=100, origin=origin) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=6, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) diff = SpectrogramDifferenceProcessor(diff_ratio=0.25, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, stft, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) # process the pre-processed signal with a NN ensemble nn = NeuralNetworkEnsemble.load(nn_files, **kwargs) # instantiate a SequentialProcessor super(RNNOnsetProcessor, self).__init__((pre_processor, nn))
def process_beats_and_chords(youtube_id): print('START PROCESS >> ', str(datetime.now())) from madmom.processors import ParallelProcessor processMulti = ParallelProcessor([], num_threads=2) processMulti.append(get_beat_processor()) processMulti.append(get_chords_processor()) print('BEAT PROCESS >> ', str(datetime.now())) return processMulti.process(OUT_FILE_PATH + youtube_id + '.' + OUT_FILE_EXT)
def __init__(self, online=False, **kwargs): # pylint: disable=unused-argument from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.spectrogram import ( FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor) from ..models import ONSETS_RNN, ONSETS_BRNN from ..ml.nn import NeuralNetworkEnsemble # choose the appropriate models and set frame sizes accordingly if online: origin = 'online' nn_files = ONSETS_RNN frame_sizes = [512, 1024, 2048] else: origin = 'offline' nn_files = ONSETS_BRNN frame_sizes = [1024, 2048, 4096] # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) # process the multi-resolution spec & diff in parallel multi = ParallelProcessor([]) for frame_size in frame_sizes: frames = FramedSignalProcessor(frame_size=frame_size, fps=100, origin=origin) filt = FilteredSpectrogramProcessor( num_bands=6, fmin=30, fmax=17000, norm_filters=True) spec = LogarithmicSpectrogramProcessor(mul=5, add=1) diff = SpectrogramDifferenceProcessor( diff_ratio=0.25, positive_diffs=True, stack_diffs=np.hstack) # process each frame size with spec and diff sequentially multi.append(SequentialProcessor((frames, filt, spec, diff))) # stack the features and processes everything sequentially pre_processor = SequentialProcessor((sig, multi, np.hstack)) # process the pre-processed signal with a NN ensemble nn = NeuralNetworkEnsemble.load(nn_files, **kwargs) # instantiate a SequentialProcessor super(RNNOnsetProcessor, self).__init__((pre_processor, nn))
def extract(yt_id): beats = SequentialProcessor( [RNNBeatProcessor(), DBNBeatTrackingProcessor(fps=100)]) chordrec = SequentialProcessor( [CNNChordFeatureProcessor(), CRFChordRecognitionProcessor()]) processMulti = ParallelProcessor([]) processMulti.append(beats) processMulti.append(chordrec) beatSync = SequentialProcessor( [printTime, processMulti, printTime, arrange, printTime]) return beatSync('tmp/' + yt_id + '.wav')