示例#1
0
def cheap_eq(seg, focus_freq, bandwidth=100, mode="peak", gain_dB=0, order=2):
    '''
	Cheap EQ in PyDub
	Silence=-120dBFS
	I2/I1=2=>3dB SPL Gain
	'''
    if gain_dB >= 0:
        if mode == "peak":
            sec = band_pass_filter(seg,
                                   focus_freq - bandwidth / 2,
                                   focus_freq + bandwidth / 2,
                                   order=order)
            seg = seg.overlay(sec - (3 - gain_dB))
            return peak_limiter(seg)
            pass
        if mode == "low_shelf":
            sec = low_pass_filter(seg, focus_freq, order=order)
            seg = seg.overlay(sec - (3 - gain_dB))
            return peak_limiter(seg)
            pass
        if mode == "high_shelf":
            sec = high_pass_filter(seg, focus_freq, order=order)
            seg = seg.overlay(sec - (3 - gain_dB))
            return peak_limiter(seg)
            pass
        pass
    if gain_dB < 0:
        if mode == "peak":
            sec = high_pass_filter(seg,
                                   focus_freq - bandwidth / 2,
                                   order=order)
            seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
            sec = low_pass_filter(seg, focus_freq + bandwidth / 2, order=order)
            seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
            return peak_limiter(seg)
            pass
        if mode == "low_shelf":
            sec = high_pass_filter(seg, focus_freq, order=order)
            seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
            return peak_limiter(seg)
            pass
        if mode == "high_shelf":
            sec = low_pass_filter(seg, focus_freq, order=order)
            seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
            return peak_limiter(seg)
            pass
        pass
    pass
示例#2
0
def AudioStandarize(audio_file,
                    sr=32000,
                    device=None,
                    high_pass=0,
                    ultrasonic=False):
    if not device:
        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    filext = audio_file[-3:].lower()
    if filext == "mp3":
        sound = AudioSegment.from_mp3(audio_file)
    elif filext == "wma":
        sound = AudioSegment.from_file(audio_file, "wma")
    elif filext == "m4a":
        sound = AudioSegment.from_file(audio_file, "m4a")
    elif filext == "ogg":
        sound = AudioSegment.from_ogg(audio_file)
    elif filext == "wav":
        sound = AudioSegment.from_wav(audio_file)
    elif filext in ["mp4", "wma", "aac"]:
        sound = AudioSegment.from_file(audio_file, filext)
    else:
        print(
            'Sorry, this file type is not permitted. The legal extensions are: wav, mp3, wma, m4a, ogg.'
        )
        return None
    original_metadata = {
        'channel': sound.channels,
        'sample_rate': sound.frame_rate,
        'sample_size': len(sound.get_array_of_samples()),
        'duration': sound.duration_seconds
    }
    print(
        'Origional audio: channel = %s, sample_rate = %s Hz, sample_size = %s, duration = %s s'
        % (original_metadata['channel'], original_metadata['sample_rate'],
           original_metadata['sample_size'], original_metadata['duration']))
    if ultrasonic:
        if sound.frame_rate > 100000:  # UltraSonic
            sound = speed_change(sound, 1 / 12)
        else:
            return False
    if sound.frame_rate > sr:
        sound = scipy_effects.low_pass_filter(sound, sr / 2)
    if sound.frame_rate != sr:
        sound = sound.set_frame_rate(sr)
    if sound.channels > 1:
        sound = sound.split_to_mono()[0]
    if not sound.sample_width == 2:
        sound = sound.set_sample_width(2)
    if high_pass:
        sound = sound.high_pass_filter(high_pass)
    sound = effects.normalize(sound)  # normalize max-amplitude to 0 dB
    songdata = np.array(sound.get_array_of_samples())
    duration = round(songdata.shape[0] / sound.frame_rate * 1000)  #ms
    audiodata = torch.tensor(songdata, device=device).float()
    print(
        'Standarized audio: channel = %s, sample_rate = %s Hz, sample_size = %s, duration = %s s'
        % (sound.channels, sound.frame_rate, songdata.shape[0],
           sound.duration_seconds))
    return sound.frame_rate, audiodata, duration, sound, original_metadata
示例#3
0
def blah_custom_eq(sample,
              low_db=0, demud_db=0, intel_db=0, air_db=0):
    sample.set_channels(1) # tmp
    
    print("eq-ing ...")
    bands = []
    bands.append( scipy_effects.low_pass_filter(sample, 100) + low_db )
    bands.append( scipy_effects.band_pass_filter(sample, 100, 250) )
    bands.append( scipy_effects.band_pass_filter(sample, 250, 300) + demud_db )
    bands.append( scipy_effects.band_pass_filter(sample, 300, 2500) )
    bands.append( scipy_effects.band_pass_filter(sample, 2500, 3000) + intel_db )
    bands.append( scipy_effects.band_pass_filter(sample, 3000, 10000) )
    bands.append( scipy_effects.band_pass_filter(sample, 10000, 16000) + air_db )
    bands.append( scipy_effects.high_pass_filter(sample, 16000) )
    result = bands[0]
    for b in bands[1:]:
        result = result.overlay(b)

    plt.figure()

    sample.set_channels(1)
    raw = sample.get_array_of_samples()
    fft = np.fft.rfft(raw)
    freq = np.fft.rfftfreq(len(raw), d=1/sample_rate)
    plt.plot(freq, np.abs(fft), label="sample")

    if True:
        for i, b in enumerate(bands[1:2]):
            b.set_channels(1)
            raw = b.get_array_of_samples()
            fft = np.fft.rfft(raw)
            freq = np.fft.rfftfreq(len(raw), d=1/sample_rate)
            plt.plot(freq, np.abs(fft), label="band %d" % i)
    
    result.set_channels(1)
    raw = result.get_array_of_samples()
    fft = np.fft.rfft(raw)
    freq = np.fft.rfftfreq(len(raw), d=1/sample_rate)
    plt.plot(freq, np.abs(fft), label="eq result")
    
    #fig, ax = plt.subplots(nrows=8, sharex=True, sharey=True)
    #oenv = librosa.onset.onset_strength(y=np.array(raw).astype('float'),
    #                                    sr=sample_rate,
    #                                    hop_length=hop_length)
    #t = librosa.times_like(oenv, sr=sample_rate, hop_length=hop_length)
    #chroma = librosa.feature.chroma_cqt(y=np.array(raw).astype('float'),
    #                                    sr=sample_rate,
    #                                    hop_length=hop_length)
    #img = librosa.display.specshow(chroma,
    #                               x_axis='time',
    #                               y_axis='chroma',
    #                               hop_length=int(hop_length*0.5), ax=ax[0])
    #fig.colorbar(img, ax=ax)a

    plt.legend()
    plt.show()

    return result
示例#4
0
def lofi_filter(audioclip: AudioSegment, cutoff=500) -> AudioSegment:
    """
    Yeah I just rewrapped the pydub's band_pass_filter. Fight me. 
    """

    audioclip = scipy_effects.low_pass_filter(audioclip, cutoff)
    audioclip = scipy_effects.high_pass_filter(audioclip, cutoff)

    return audioclip
示例#5
0
def cheap_eq(seg, focus_freq, bandwidth=100, mode="peak", gain_dB=0, order=5):
    '''
	Cheap EQ in PyDub
	'''
    if gain_dB >= 0:
        if mode == "peak":
            sec = band_pass_filter(seg,
                                   focus_freq - bandwidth / 2,
                                   focus_freq + bandwidth / 2,
                                   order=order)
            pass
        if mode == "low_shelf":
            sec = low_pass_filter(seg, focus_freq, order=order)
            pass
        if mode == "high_shelf":
            sec = low_pass_filter(seg, focus_freq, order=order)
            pass
        seg = seg.overlay(sec - (6 - gain_dB))
        pass
    return seg
示例#6
0
HP_FILE = os.path.join(OUT_PATH,f"hp_{FILE_DENOMINATOR}_16k_mono.wav")
LP_FILE = os.path.join(OUT_PATH,f"lp_{FILE_DENOMINATOR}_16k_mono.wav")
BP_FILE = os.path.join(OUT_PATH,f"bp_{FILE_DENOMINATOR}_16k_mono.wav")

# Open MP3 file
audio_file = AudioSegment.from_mp3(MP3_FILE)
print("> successfully opened mp3 file")

# use ffmpeg for conversion to 16 bit & mono channel wav
# required format for transcription api
subprocess.call(f"ffmpeg -i {MP3_FILE} -acodec pcm_s16le -ac 1 -ar 16000 {WAV_FILE}", shell=True)

# Use pydub low pass filter and export
# cuts off all frequencies above 3500 Hz
lp_audio = scipy_effects.low_pass_filter(audio_file, 3500)
LP_MP3 = LP_FILE.replace("wav", "mp3")
lp_audio.export(LP_MP3, format="mp3")
subprocess.call(f"ffmpeg -i {LP_MP3} -acodec pcm_s16le -ac 1 -ar 16000 {LP_FILE}", shell=True)
print("> successfully saved and converted low pass filter")

# Use pydub high pass filter and export
# cuts off all frequencies below 70 Hz
# reference: Human hearing starts at 20 Hz, lowest average human speech starts at 85 Hz
hp_audio = scipy_effects.high_pass_filter(audio_file, 70)
HP_MP3 = HP_FILE.replace("wav", "mp3")
hp_audio.export(HP_MP3, format="mp3")
subprocess.call(f"ffmpeg -i {HP_MP3} -acodec pcm_s16le -ac 1 -ar 16000 {HP_FILE}", shell=True)
print("> successfully saved and converted high pass filter")

# Use pydub bandpass filter and export
示例#7
0
'''
DynaMIX Sample Processing:-
Time Domain Gain Analysis: Using Relative Dominance
Relative Stereo Separation
'''
#Channel Processing
for x in range(0, len(left_chunk)):
    l = left_chunk[x].max_dBFS
    r = right_chunk[x].max_dBFS
    m = mid_chunk[x].max_dBFS
    sl = side_left_chunk[x].max_dBFS
    sr = side_right_chunk[x].max_dBFS
    if m == l == r == sl == sr:
        side_left_chunk[x] = side_left_chunk[x] - 6
        side_right_chunk[x] = side_right_chunk[x] - 6
        low_pass_filter(side_right_chunk[x], 7000, order=2)
        low_pass_filter(side_left_chunk[x], 7000, order=2)
        mid_chunk[x] = mid_chunk[x] - 3
        left_chunk[x] = stereo_sepration_from_mono(6, left_chunk[x],
                                                   right_chunk[x])[0]
        right_chunk[x] = stereo_sepration_from_mono(6, left_chunk[x],
                                                    right_chunk[x])[1]
        pass
    elif m == max(m, l, r, sl, sr):
        mid_chunk[x] = mid_chunk[x] - 3
        side_left_chunk[x] = side_left_chunk[x] - 6
        side_right_chunk[x] = side_right_chunk[x] - 6
        left_chunk[x] = stereo_sepration_from_mono(3, left_chunk[x],
                                                   right_chunk[x])[0]
        right_chunk[x] = stereo_sepration_from_mono(3, left_chunk[x],
                                                    right_chunk[x])[1]
示例#8
0
side_channel[5] = cheap_eq(side_channel[5],
                           8993,
                           mode="high_shelf",
                           gain_dB=-18,
                           order=2)
side_channel[4] = cheap_eq(side_channel[4],
                           11314,
                           mode="high_shelf",
                           gain_dB=15,
                           order=2)
side_channel[5] = cheap_eq(side_channel[5],
                           11314,
                           mode="high_shelf",
                           gain_dB=15,
                           order=2)
rear_channel[0] = low_pass_filter(rear_channel[0], 7592, order=2)
rear_channel[1] = low_pass_filter(rear_channel[1], 7592, order=2)
#Combination
reflect_level = -6
f_amb_fact = (2 / 0.343)
s_amb_fact = (2.085 / 0.343)
r_amb_fact = 40 + (np.sqrt(2**2 + 7**2) / 0.343)
channel[0] = (mid_channel - 5.75)
channel[0] = channel[0].overlay(ex_channel[0] - 12.5,
                                position=f_amb_fact + 0.25)
channel[0] = channel[0].overlay(ex_channel[2] - 9.5,
                                position=f_amb_fact + 0.4999)
channel[0] = channel[0].overlay(ex_channel[4], position=s_amb_fact + 0.6874)
channel[0] = channel[0].overlay(side_channel[4] + reflect_level,
                                position=40 + s_amb_fact + 0.6874)
channel[0] = channel[0].overlay(side_channel[2] - 9.5 + reflect_level,