Пример #1
0
def get_sines_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Perform framewise sinusoidal model in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    len_arrays = 0
    for i, _ in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        len_arrays = i

    fft_algo = std.FFT()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = np.zeros([len_arrays + 1, 2, nsines], dtype=np.float32) + eps
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        sines[i, :] = [freqs, mags]
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
Пример #2
0
def get_hpeaks_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Get Harmonic peaks in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    fft_algo = std.FFT()
    pyin = std.PitchYin()
    hpeaks = std.HarmonicPeaks()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = []
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        pitch, _ = pyin(frame)
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        non_zero_freqs = np.where(freqs != 0)
        freqs = freqs[non_zero_freqs]
        mags = mags[non_zero_freqs]
        freqs, mags = hpeaks(freqs, mags, pitch)
        sines.append([freqs, mags])
    sines = np.array(sines)
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
Пример #3
0
def mix_to_mono(audio):
    """
    Mix an audio file down to mono

    :param audio: (np.ndarray) audio samples
    :return: (nd.ndarray) mono audio samples
    """

    mono_mix = es.MonoMixer()
    samples = mono_mix(audio, audio.shape[1])
    return samples
Пример #4
0
def mix(audio1, audio2, sr):
    """
    Function to mix audios with a normalised loudness
    :param audio1: Audio vector to normalize
    :param audio2: Audio vector to normalize
    :param sr: Sample rate of the final mix
    :return: Audio vector of the normalised mix
    """
    if audio1.ndim > 1:
        audio1 = std.MonoMixer()(audio1, audio1.shape[1])
    if audio2.ndim > 1:
        audio2 = std.MonoMixer()(audio2, audio2.shape[1])
    std.MonoWriter(filename='temporal1.wav', sampleRate=sr)(audio1)
    std.MonoWriter(filename='temporal2.wav', sampleRate=sr)(audio2)

    stream1 = (ffmpeg.input('temporal1.wav').filter('loudnorm'))

    stream2 = (ffmpeg.input('temporal2.wav').filter('loudnorm'))
    merged_audio = ffmpeg.filter([stream1, stream2], 'amix')
    ffmpeg.output(merged_audio, 'temporal_o.wav').overwrite_output().run()

    audio_numpy = std.MonoLoader(filename='temporal_o.wav')()
    return audio_numpy
Пример #5
0
            # Compute beat positions and BPM
            rhythm_extractor = esst.RhythmExtractor2013(method="multifeature")
            bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
                audio)

            # print("BPM:", bpm)
            # print("Beat positions (sec.):", beats)
            print("Beat estimation confidence:", beats_confidence)

            for i in range(len(beats)):
                trim = esst.Trimmer(startTime=[*map(lambda x: x - 0.01, beats)][i],
                     endTime=[*map(lambda x: x + 0.15, beats)][i]) \
                 (audio)

                if len(mixed_sample):
                    trim = np.resize(trim, mixed_sample.size)
                    stereo_mix = esst.StereoMuxer()(mixed_sample, trim)
                    esst.MonoMixer()(stereo_mix, 2)
                else:
                    mixed_sample = trim
            output_array = np.concatenate([output_array, mixed_sample])

esst.MonoWriter(filename=f"../samples/mix_beat{str(i)}.mp3")(output_array)

# Mark beat positions on the audio and write it to a file
# Let's use beeps instead of white noise to mark them, as it's more distinctive
# marker = AudioOnsetsMarker(onsets=beats, type='beep')
# marked_audio = marker(audio)
# MonoWriter(filename='../samples/dubstep_beats.flac')(marked_audio)