Пример #1
0
def CreateProcesser(fps=100):
    # define pre-processing chain
    sig = SignalProcessor(num_channels=1, sample_rate=44100)
    # process the multi-resolution spec & diff in parallel
    # process the multi-resolution spec & diff in parallel
    multi = ParallelProcessor([])
    frame_sizes = [1024, 2048, 4096]
    num_bands = [3, 6, 12]
    for frame_size, num_bands in zip(frame_sizes, num_bands):
        frames = FramedSignalProcessor(frame_size=frame_size, fps=fps)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(num_bands=num_bands,
                                            fmin=30,
                                            fmax=17000,
                                            norm_filters=True)
        spec = LogarithmicSpectrogramProcessor(mul=1, add=1)
        diff = SpectrogramDifferenceProcessor(diff_ratio=0.5,
                                              positive_diffs=True,
                                              stack_diffs=np.hstack)
        # process each frame size with spec and diff sequentially
        multi.append(SequentialProcessor((frames, stft, filt, spec, diff)))

    # stack the features and processes everything sequentially
    pre_processor = SequentialProcessor((sig, multi, np.hstack))
    return pre_processor
Пример #2
0
    def __init__(self, **kwargs):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor,
                                              SpectrogramDifferenceProcessor)
        from madmom.processors import SequentialProcessor, ParallelProcessor

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=44100)
        # process the multi-resolution spec & diff in parallel
        multi = ParallelProcessor([])
        for frame_size in [4096]:
            frames = FramedSignalProcessor(frame_size=frame_size, fps=100)
            stft = ShortTimeFourierTransformProcessor(
                window=np.hamming(frame_size))  # caching FFT window
            filt = FilteredSpectrogramProcessor(num_bands=12,
                                                fmin=30,
                                                fmax=16000,
                                                norm_filters=True)
            spec = LogarithmicSpectrogramProcessor(mul=5, add=1)
            #diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack)
            # process each frame size with spec and diff sequentially
            multi.append(SequentialProcessor((frames, stft, filt, spec)))
            #multi.append(SequentialProcessor((frames, stft, filt)))

        # stack the features and processes everything sequentially
        pre_processor = SequentialProcessor((sig, multi, np.hstack))
        super(PianoNoteProcessor, self).__init__(pre_processor)
    def __init__(self, fs, hopsize_t):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.filters import MelFilterbank
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        # from madmom.features.onsets import _cnn_onset_processor_pad

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=fs)
        # process the multi-resolution spec in parallel
        multi = ParallelProcessor([])
        for frame_size in [2048, 1024, 4096]:
            frames = FramedSignalProcessor(frame_size=frame_size, fps=100)
            stft = ShortTimeFourierTransformProcessor()  # caching FFT window
            filt = FilteredSpectrogramProcessor(
                filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000,
                norm_filters=True, unique_filters=False)
            spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)
            # process each frame size with spec and diff sequentially
            multi.append(SequentialProcessor([frames, stft, filt, spec]))
        # stack the features (in depth) and pad at beginning and end
        stack = np.dstack
        # pad = _cnn_onset_processor_pad
        # pre-processes everything sequentially
        pre_processor = SequentialProcessor([sig, multi, stack])
        # instantiate a SequentialProcessor
        super(MadmomMelbank3ChannelsProcessor, self).__init__([pre_processor])
Пример #4
0
    def __init__(self, fs, hopsize_t):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.filters import MelFilterbank
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        # from madmom.features.onsets import _cnn_onset_processor_pad

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=fs)
        # process the multi-resolution spec in parallel
        frames = FramedSignalProcessor(frame_size=2048,
                                       hopsize=int(fs * hopsize_t))
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(filterbank=MelFilterbank,
                                            num_bands=80,
                                            fmin=27.5,
                                            fmax=16000,
                                            norm_filters=True,
                                            unique_filters=False)
        spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)

        # process each frame size with spec and diff sequentially
        single = SequentialProcessor([frames, stft, filt, spec])

        # pre-processes everything sequentially
        pre_processor = SequentialProcessor([sig, single])

        # instantiate a SequentialProcessor
        super(MadmomMelbankProcessor, self).__init__([pre_processor])
    def __init__(self, fs, hopsize_t):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.filters import MelFilterbank
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=fs)
        frames = FramedSignalProcessor(frame_size=2048,
                                       hopsize=int(fs * hopsize_t))
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(filterbank=MelFilterbank,
                                            num_bands=80,
                                            fmin=27.5,
                                            fmax=16000,
                                            norm_filters=True,
                                            unique_filters=False)
        spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)

        single = SequentialProcessor([frames, stft, filt, spec])

        pre_processor = SequentialProcessor([sig, single])

        super(MadmomMelbankProcessor, self).__init__([pre_processor])
Пример #6
0
    def spec_from_midi(midi_file):

        sig_proc = SignalProcessor(num_channels=1, sample_rate=spec_params["sample_rate"])
        fsig_proc = FramedSignalProcessor(frame_size=spec_params["frame_size"], fps=spec_params["fps"])
        spec_proc = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank, num_bands=12, fmin=60, fmax=6000,
                                                 norm_filters=True, unique_filters=False)
        log_proc = LogarithmicSpectrogramProcessor()
        processor = SequentialProcessor([sig_proc, fsig_proc, spec_proc, log_proc])

        # print(midi_file)
        if not os.path.isfile(midi_file.replace('.mid', '.wav')):
            # render audio file from midi
            render_audio(midi_file, sound_font=SOUND_FONT_PATH)

        # compute spectrogram
        audio_path = midi_file.replace('.mid', '.wav')

        # if the spectrogram doesn't exist it will be computed and stored
        if not os.path.isfile(midi_file.replace('.mid', '.spec.npy')):
            spec = processor.process(audio_path).T
            np.save(midi_file.replace('.mid', '.spec'), spec)
        else:
            spec = np.load(midi_file.replace('.mid', '.spec.npy'))

        return spec
Пример #7
0
def create_feature_extraction_pipeline(sr=44100,
                                       frame_sizes=[1024, 2048, 4096],
                                       fps_hz=100.):
    audio_loading = Pipeline([
        ("load_audio", FeatureExtractor(librosa.load, sr=sr, mono=True)),
        ("normalize", FeatureExtractor(librosa.util.normalize, norm=np.inf))
    ])

    sig = SignalProcessor(num_channels=1, sample_rate=sr)
    multi = ParallelProcessor([])
    for frame_size in frame_sizes:
        frames = FramedSignalProcessor(frame_size=frame_size, fps=fps_hz)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank,
                                            num_bands=12,
                                            fmin=30,
                                            fmax=17000,
                                            norm_filters=True,
                                            unique_filters=True)
        spec = LogarithmicSpectrogramProcessor(log=np.log10, mul=5, add=1)
        diff = SpectrogramDifferenceProcessor(diff_ratio=0.5,
                                              positive_diffs=True,
                                              stack_diffs=np.hstack)
        # process each frame size with spec and diff sequentially
        multi.append(SequentialProcessor([frames, stft, filt, spec, diff]))
    feature_extractor = FeatureExtractor(
        SequentialProcessor([sig, multi, np.hstack]))

    feature_extraction_pipeline = Pipeline([("audio_loading", audio_loading),
                                            ("feature_extractor",
                                             feature_extractor)])
    return feature_extraction_pipeline
Пример #8
0
def spectrogram_processor(spec_params):
    """Helper function for our spectrogram extraction."""
    sig_proc = SignalProcessor(num_channels=1,
                               sample_rate=spec_params['sample_rate'])
    fsig_proc = FramedSignalProcessor(frame_size=spec_params['frame_size'],
                                      fps=spec_params['fps'])

    spec_proc = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank,
                                             num_bands=12,
                                             fmin=60,
                                             fmax=6000,
                                             norm_filters=True,
                                             unique_filters=False)
    log_proc = LogarithmicSpectrogramProcessor()

    processor = SequentialProcessor([sig_proc, fsig_proc, spec_proc, log_proc])

    return processor
Пример #9
0
def build_cnn(madmom_processor_filename):
    from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
    from madmom.audio.stft import ShortTimeFourierTransformProcessor
    from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                          LogarithmicSpectrogramProcessor)

    from madmom.ml.nn import NeuralNetworkEnsemble
    # define pre-processing chain
    sig = SignalProcessor(num_channels=1, sample_rate=44100)
    frames = FramedSignalProcessor(frame_size=4096, hop_size=441 * 2)
    stft = ShortTimeFourierTransformProcessor()  # caching FFT window
    filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000)

    # this is the money param! it was not whitelisted in 'canonicalize_audio_options'!
    spec = LogarithmicSpectrogramProcessor(add=1)
    # pre-processes everything sequentially
    pre_processor = SequentialProcessor([
        sig, frames, stft, filt, spec, _cnn_pad
    ])
    # process the pre-processed signal with a NN
    nn = NeuralNetworkEnsemble.load([madmom_processor_filename])
    return madmom.processors.SequentialProcessor([pre_processor, nn])
    def __init__(self, sr=44100, **kwargs):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        from madmom.ml.nn import NeuralNetworkEnsemble
        sr_ratio = 44100 / sr
        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=sr)
        frames = FramedSignalProcessor(frame_size=4096 // sr_ratio,
                                       fps=50 // sr_ratio)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000)
        spec = LogarithmicSpectrogramProcessor(add=1)
        # pre-processes everything sequentially
        pre_processor = SequentialProcessor(
            (sig, frames, stft, filt, spec, _cnn_pad))
        # process the pre-processed signal with a NN
        nn = NeuralNetworkEnsemble.load(VIENNA_MODEL_PATH)
        # instantiate a SequentialProcessor
        super().__init__((pre_processor, nn))

        self.adsr = ADSRMaestro()
Пример #11
0
from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
from madmom.audio.filters import LogarithmicFilterbank
from madmom.audio.spectrogram import FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor
from madmom.processors import SequentialProcessor

# init signal processing
SAMPLE_RATE = 22050
FRAME_SIZE = 2048
FPS = 20

sig_proc = SignalProcessor(num_channels=1, sample_rate=SAMPLE_RATE)
fsig_proc = FramedSignalProcessor(frame_size=FRAME_SIZE,
                                  fps=FPS,
                                  origin='future')
spec_proc = FilteredSpectrogramProcessor(
    LogarithmicFilterbank, num_bands=16, fmin=30,
    fmax=6000)  # num_bands=24, fmin=30, fmax=8000
log_spec_proc = LogarithmicSpectrogramProcessor()
processor = SequentialProcessor(
    [sig_proc, fsig_proc, spec_proc, log_spec_proc])

colors = ['c', 'm', 'y']


def notes_to_onsets(notes, dt):
    """ Convert sequence of keys to onset frames """

    onsets = []
    for n in notes:
        onset = int(np.ceil(n[0] / dt))
        onsets.append(onset)