def cheap_eq(seg, focus_freq, bandwidth=100, mode="peak", gain_dB=0, order=2): ''' Cheap EQ in PyDub Silence=-120dBFS I2/I1=2=>3dB SPL Gain ''' if gain_dB >= 0: if mode == "peak": sec = band_pass_filter(seg, focus_freq - bandwidth / 2, focus_freq + bandwidth / 2, order=order) seg = seg.overlay(sec - (3 - gain_dB)) return peak_limiter(seg) pass if mode == "low_shelf": sec = low_pass_filter(seg, focus_freq, order=order) seg = seg.overlay(sec - (3 - gain_dB)) return peak_limiter(seg) pass if mode == "high_shelf": sec = high_pass_filter(seg, focus_freq, order=order) seg = seg.overlay(sec - (3 - gain_dB)) return peak_limiter(seg) pass pass if gain_dB < 0: if mode == "peak": sec = high_pass_filter(seg, focus_freq - bandwidth / 2, order=order) seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB sec = low_pass_filter(seg, focus_freq + bandwidth / 2, order=order) seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB return peak_limiter(seg) pass if mode == "low_shelf": sec = high_pass_filter(seg, focus_freq, order=order) seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB return peak_limiter(seg) pass if mode == "high_shelf": sec = low_pass_filter(seg, focus_freq, order=order) seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB return peak_limiter(seg) pass pass pass
def AudioStandarize(audio_file, sr=32000, device=None, high_pass=0, ultrasonic=False): if not device: device = 'cuda:0' if torch.cuda.is_available() else 'cpu' filext = audio_file[-3:].lower() if filext == "mp3": sound = AudioSegment.from_mp3(audio_file) elif filext == "wma": sound = AudioSegment.from_file(audio_file, "wma") elif filext == "m4a": sound = AudioSegment.from_file(audio_file, "m4a") elif filext == "ogg": sound = AudioSegment.from_ogg(audio_file) elif filext == "wav": sound = AudioSegment.from_wav(audio_file) elif filext in ["mp4", "wma", "aac"]: sound = AudioSegment.from_file(audio_file, filext) else: print( 'Sorry, this file type is not permitted. The legal extensions are: wav, mp3, wma, m4a, ogg.' ) return None original_metadata = { 'channel': sound.channels, 'sample_rate': sound.frame_rate, 'sample_size': len(sound.get_array_of_samples()), 'duration': sound.duration_seconds } print( 'Origional audio: channel = %s, sample_rate = %s Hz, sample_size = %s, duration = %s s' % (original_metadata['channel'], original_metadata['sample_rate'], original_metadata['sample_size'], original_metadata['duration'])) if ultrasonic: if sound.frame_rate > 100000: # UltraSonic sound = speed_change(sound, 1 / 12) else: return False if sound.frame_rate > sr: sound = scipy_effects.low_pass_filter(sound, sr / 2) if sound.frame_rate != sr: sound = sound.set_frame_rate(sr) if sound.channels > 1: sound = sound.split_to_mono()[0] if not sound.sample_width == 2: sound = sound.set_sample_width(2) if high_pass: sound = sound.high_pass_filter(high_pass) sound = effects.normalize(sound) # normalize max-amplitude to 0 dB songdata = np.array(sound.get_array_of_samples()) duration = round(songdata.shape[0] / sound.frame_rate * 1000) #ms audiodata = torch.tensor(songdata, device=device).float() print( 'Standarized audio: channel = %s, sample_rate = %s Hz, sample_size = %s, duration = %s s' % (sound.channels, sound.frame_rate, songdata.shape[0], sound.duration_seconds)) return sound.frame_rate, audiodata, duration, sound, original_metadata
def blah_custom_eq(sample, low_db=0, demud_db=0, intel_db=0, air_db=0): sample.set_channels(1) # tmp print("eq-ing ...") bands = [] bands.append( scipy_effects.low_pass_filter(sample, 100) + low_db ) bands.append( scipy_effects.band_pass_filter(sample, 100, 250) ) bands.append( scipy_effects.band_pass_filter(sample, 250, 300) + demud_db ) bands.append( scipy_effects.band_pass_filter(sample, 300, 2500) ) bands.append( scipy_effects.band_pass_filter(sample, 2500, 3000) + intel_db ) bands.append( scipy_effects.band_pass_filter(sample, 3000, 10000) ) bands.append( scipy_effects.band_pass_filter(sample, 10000, 16000) + air_db ) bands.append( scipy_effects.high_pass_filter(sample, 16000) ) result = bands[0] for b in bands[1:]: result = result.overlay(b) plt.figure() sample.set_channels(1) raw = sample.get_array_of_samples() fft = np.fft.rfft(raw) freq = np.fft.rfftfreq(len(raw), d=1/sample_rate) plt.plot(freq, np.abs(fft), label="sample") if True: for i, b in enumerate(bands[1:2]): b.set_channels(1) raw = b.get_array_of_samples() fft = np.fft.rfft(raw) freq = np.fft.rfftfreq(len(raw), d=1/sample_rate) plt.plot(freq, np.abs(fft), label="band %d" % i) result.set_channels(1) raw = result.get_array_of_samples() fft = np.fft.rfft(raw) freq = np.fft.rfftfreq(len(raw), d=1/sample_rate) plt.plot(freq, np.abs(fft), label="eq result") #fig, ax = plt.subplots(nrows=8, sharex=True, sharey=True) #oenv = librosa.onset.onset_strength(y=np.array(raw).astype('float'), # sr=sample_rate, # hop_length=hop_length) #t = librosa.times_like(oenv, sr=sample_rate, hop_length=hop_length) #chroma = librosa.feature.chroma_cqt(y=np.array(raw).astype('float'), # sr=sample_rate, # hop_length=hop_length) #img = librosa.display.specshow(chroma, # x_axis='time', # y_axis='chroma', # hop_length=int(hop_length*0.5), ax=ax[0]) #fig.colorbar(img, ax=ax)a plt.legend() plt.show() return result
def lofi_filter(audioclip: AudioSegment, cutoff=500) -> AudioSegment: """ Yeah I just rewrapped the pydub's band_pass_filter. Fight me. """ audioclip = scipy_effects.low_pass_filter(audioclip, cutoff) audioclip = scipy_effects.high_pass_filter(audioclip, cutoff) return audioclip
def cheap_eq(seg, focus_freq, bandwidth=100, mode="peak", gain_dB=0, order=5): ''' Cheap EQ in PyDub ''' if gain_dB >= 0: if mode == "peak": sec = band_pass_filter(seg, focus_freq - bandwidth / 2, focus_freq + bandwidth / 2, order=order) pass if mode == "low_shelf": sec = low_pass_filter(seg, focus_freq, order=order) pass if mode == "high_shelf": sec = low_pass_filter(seg, focus_freq, order=order) pass seg = seg.overlay(sec - (6 - gain_dB)) pass return seg
HP_FILE = os.path.join(OUT_PATH,f"hp_{FILE_DENOMINATOR}_16k_mono.wav") LP_FILE = os.path.join(OUT_PATH,f"lp_{FILE_DENOMINATOR}_16k_mono.wav") BP_FILE = os.path.join(OUT_PATH,f"bp_{FILE_DENOMINATOR}_16k_mono.wav") # Open MP3 file audio_file = AudioSegment.from_mp3(MP3_FILE) print("> successfully opened mp3 file") # use ffmpeg for conversion to 16 bit & mono channel wav # required format for transcription api subprocess.call(f"ffmpeg -i {MP3_FILE} -acodec pcm_s16le -ac 1 -ar 16000 {WAV_FILE}", shell=True) # Use pydub low pass filter and export # cuts off all frequencies above 3500 Hz lp_audio = scipy_effects.low_pass_filter(audio_file, 3500) LP_MP3 = LP_FILE.replace("wav", "mp3") lp_audio.export(LP_MP3, format="mp3") subprocess.call(f"ffmpeg -i {LP_MP3} -acodec pcm_s16le -ac 1 -ar 16000 {LP_FILE}", shell=True) print("> successfully saved and converted low pass filter") # Use pydub high pass filter and export # cuts off all frequencies below 70 Hz # reference: Human hearing starts at 20 Hz, lowest average human speech starts at 85 Hz hp_audio = scipy_effects.high_pass_filter(audio_file, 70) HP_MP3 = HP_FILE.replace("wav", "mp3") hp_audio.export(HP_MP3, format="mp3") subprocess.call(f"ffmpeg -i {HP_MP3} -acodec pcm_s16le -ac 1 -ar 16000 {HP_FILE}", shell=True) print("> successfully saved and converted high pass filter") # Use pydub bandpass filter and export
''' DynaMIX Sample Processing:- Time Domain Gain Analysis: Using Relative Dominance Relative Stereo Separation ''' #Channel Processing for x in range(0, len(left_chunk)): l = left_chunk[x].max_dBFS r = right_chunk[x].max_dBFS m = mid_chunk[x].max_dBFS sl = side_left_chunk[x].max_dBFS sr = side_right_chunk[x].max_dBFS if m == l == r == sl == sr: side_left_chunk[x] = side_left_chunk[x] - 6 side_right_chunk[x] = side_right_chunk[x] - 6 low_pass_filter(side_right_chunk[x], 7000, order=2) low_pass_filter(side_left_chunk[x], 7000, order=2) mid_chunk[x] = mid_chunk[x] - 3 left_chunk[x] = stereo_sepration_from_mono(6, left_chunk[x], right_chunk[x])[0] right_chunk[x] = stereo_sepration_from_mono(6, left_chunk[x], right_chunk[x])[1] pass elif m == max(m, l, r, sl, sr): mid_chunk[x] = mid_chunk[x] - 3 side_left_chunk[x] = side_left_chunk[x] - 6 side_right_chunk[x] = side_right_chunk[x] - 6 left_chunk[x] = stereo_sepration_from_mono(3, left_chunk[x], right_chunk[x])[0] right_chunk[x] = stereo_sepration_from_mono(3, left_chunk[x], right_chunk[x])[1]
side_channel[5] = cheap_eq(side_channel[5], 8993, mode="high_shelf", gain_dB=-18, order=2) side_channel[4] = cheap_eq(side_channel[4], 11314, mode="high_shelf", gain_dB=15, order=2) side_channel[5] = cheap_eq(side_channel[5], 11314, mode="high_shelf", gain_dB=15, order=2) rear_channel[0] = low_pass_filter(rear_channel[0], 7592, order=2) rear_channel[1] = low_pass_filter(rear_channel[1], 7592, order=2) #Combination reflect_level = -6 f_amb_fact = (2 / 0.343) s_amb_fact = (2.085 / 0.343) r_amb_fact = 40 + (np.sqrt(2**2 + 7**2) / 0.343) channel[0] = (mid_channel - 5.75) channel[0] = channel[0].overlay(ex_channel[0] - 12.5, position=f_amb_fact + 0.25) channel[0] = channel[0].overlay(ex_channel[2] - 9.5, position=f_amb_fact + 0.4999) channel[0] = channel[0].overlay(ex_channel[4], position=s_amb_fact + 0.6874) channel[0] = channel[0].overlay(side_channel[4] + reflect_level, position=40 + s_amb_fact + 0.6874) channel[0] = channel[0].overlay(side_channel[2] - 9.5 + reflect_level,