def get_lufs_loudness(path_to_wav): data, rate = sf.read(path_to_wav) # load audio (with shape (samples, channels)) meter = pyln.Meter(rate, block_size=0.100) # create BS.1770 meter loudness = meter.integrated_loudness(data) # measure loudness return loudness
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--sampling-rate', '-r', default=48000, type=int) args = parser.parse_args() rate = args.sampling_rate block_size = 0.4 frames_per_buffer = int(block_size * rate) p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paFloat32, channels=2, rate=rate, input=True, frames_per_buffer=frames_per_buffer) meter = pyln.Meter(rate, block_size=0.4) try: while True: data = np.frombuffer(stream.read(frames_per_buffer), dtype=np.float32) dataL = data[0::2] dataR = data[1::2] valL = meter.integrated_loudness(dataL) # LUFS valR = meter.integrated_loudness(dataR) #if args.bars: # lString = "#"*int(-valL)+"-"*int(bars+valL) # rString = "#"*int(-valR)+"-"*int(bars+valR) # print("L=[%s]\tR=[%s]"%(lString, rString)) print("L:%+6.2f R:%+6.2f" % (valL, valR)) except KeyboardInterrupt: print()
def process_wav(wav_path, out_path, cfg): meter = pyln.Meter(cfg["sr"]) wav, _ = librosa.load(wav_path.with_suffix(".wav"), sr=cfg["sr"]) loudness = meter.integrated_loudness(wav) wav = pyln.normalize.loudness(wav, loudness, -24) peak = np.abs(wav).max() if peak >= 1: wav = wav / peak * 0.999 logmel = melspectrogram( wav, sr=cfg["sr"], hop_length=cfg["hop_length"], win_length=cfg["win_length"], n_fft=cfg["n_fft"], n_mels=cfg["n_mels"], fmin=cfg["fmin"], preemph=cfg["preemph"], top_db=cfg["top_db"], ) wav = mu_compress( wav, hop_length=cfg["hop_length"], frame_length=cfg["win_length"], bits=cfg["mulaw"]["bits"], ) np.save(out_path.with_suffix(".mel.npy"), logmel) np.save(out_path.with_suffix(".wav.npy"), wav) return out_path, logmel.shape[-1]
def process_wav(wav_path, out_path, cfg): meter = pyln.Meter(cfg.sr) wav, _ = librosa.load(wav_path.with_suffix(".wav"), sr=cfg.sr) loudness = meter.integrated_loudness(wav) wav = pyln.normalize.loudness(wav, loudness, -24) peak = np.abs(wav).max() if peak >= 1: wav = wav / peak * 0.999 logmel = melspectrogram( wav, sr=cfg.sr, hop_length=cfg.hop_length, win_length=cfg.win_length, n_fft=cfg.n_fft, n_mels=cfg.n_mels, fmin=cfg.fmin, preemph=cfg.preemph, top_db=cfg.top_db, ) wav = mu_compress( wav, hop_length=cfg.hop_length, frame_length=cfg.win_length, bits=cfg.mulaw.bits, ) np.save(out_path.with_suffix(".mel.npy"), logmel) np.save(out_path.with_suffix(".wav.npy"), wav) return out_path, logmel.shape[-1]
def compress_signals(params_list, files, path, prefix, size, sr): if not os.path.exists(path): os.mkdir(path) for i in range(len(params_list)): file_params = params_list[i] new_file_name = prefix + files[i] full_file_path = list(file_params.keys())[0] dur = sndinfo(full_file_path)[1] filename = os.path.join(path, new_file_name) s = Server(audio='offline').boot() s.recordOptions(dur=dur, filename=filename) for file, params in params_list[i].items(): out = SfPlayer(full_file_path) out = Compress(out, thresh=params[0], ratio=params[1], risetime=params[2], falltime=params[3], knee=0.4).out() s.start() outp, rate = sf.read(filename) inp, _ = sf.read(file) meter = pyln.Meter(rate) out_l = meter.integrated_loudness(outp) inp_l = meter.integrated_loudness(inp) makeup_gain = inp_l - out_l compressed_signal = AudioSegment.from_wav(file) compressed_signal = compressed_signal + makeup_gain compressed_signal.export(filename, format="wav") s.shutdown()
def test_integrated_loudness(): data, rate = sf.read("tests/data/sine_1000.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) assert loudness == -3.0523438444331137
def break_video_files(input_video, input_data, output_path): global path num = 0 loudness_dict = {} df = pd.read_csv(input_data) for ind in df.index: with VideoFileClip(input_video) as clips: # start and end time is provided from input_data file dynamically clip = clips.subclip(df['start time'][ind], df['end time'][ind]) # create .mp4 files from subclips clip.write_videofile( os.path.join(output_path, "output_%s.mp4" % str(num).zfill(3))) # creates .wav file of subclips clip.audio.write_audiofile( os.path.join(output_path, "output_%s.wav" % str(num).zfill(3))) # keep count of number of files num += 1 for file_name in os.listdir(output_path): if file_name.endswith(".wav"): path = os.path.join(output_path, file_name) data, rate = sf.read( path) # load audio (with shape (samples, channels)) meter = pyln.Meter(rate) # create BS.1770 meter loudness = meter.integrated_loudness(data) # measure loudness file_name = file_name.split(".")[0] + ".mp4" loudness_dict[file_name] = loudness return loudness_dict, path
def calculate_loudness(audio_subclip, fps): CompositeAudioClip([audio_subclip]).write_audiofile(AUDIO_SUBCLIP_NAME, fps=fps) data, rate = sf.read(AUDIO_SUBCLIP_NAME) # load audio meter = pyln.Meter(rate) # create BS.1770 meter loudness = meter.integrated_loudness(data) # measure loudness return loudness
def set_loudness(sources_list): loudness_list = [] meter = pyln.Meter(RATE) target_loudness_list = [] sources_list_norm = [] for srcs in sources_list: src_list_norm = [] trg_loudness_list = [] loudness = [] for i in range(len(srcs)): # Initialize loudness loudness.append(meter.integrated_loudness(srcs[i])) # Pick a random loudness target_loudness = random.uniform(MIN_LOUDNESS, MAX_LOUDNESS) # Normalize source to target loudness with warnings.catch_warnings(): warnings.simplefilter('ignore') src = pyln.normalize.loudness(srcs[i], loudness[i], target_loudness) if np.max(np.abs(src)) >= 1: src = srcs[i] * MAX_AMP / np.max(np.abs(srcs[i])) target_loudness = meter.integrated_loudness(src) # Save tmp results src_list_norm.append(src) trg_loudness_list.append(target_loudness) # Save final results sources_list_norm.append(src_list_norm) target_loudness_list.append(trg_loudness_list) loudness_list.append(loudness) return loudness_list, target_loudness_list, sources_list_norm
def pictureMake(): data, rate = sf.read("input.wav") # load audio (with shape (samples, channels)) meter = pyln.Meter(rate) # create BS.1770 meter loudness = meter.integrated_loudness(data) # measure loudness loudness = int(loudness) if loudness < 0: loudness = -loudness print(loudness) random_number = random.randint(0,16777215) hex_number = str(hex(random_number)) hex_number ='#'+ hex_number[2:] canvas = Image.new("RGB", (300,300), hex_number) width, height = canvas.size pixels = canvas.load() for x in range(height): randomCol1 = random.randint(0,255) randomCol2 = random.randint(0,255) randomCol3 = random.randint(0,255) for y in range(width): canvas.putpixel((x,y),(randomCol1, randomCol2, randomCol3)) canvas.putpixel((x,x),(randomCol1, randomCol2, randomCol3)) canvas.save("output.png", "PNG")
def __init__(self, input_sr, output_sr=None, melspec_buckets=80, hop_length=256, n_fft=1024, cut_silence=False): """ The parameters are by default set up to do well on a 16kHz signal. A different frequency may require different hop_length and n_fft (e.g. doubling frequency --> doubling hop_length and doubling n_fft) """ self.cut_silence = cut_silence self.sr = input_sr self.new_sr = output_sr self.hop_length = hop_length self.n_fft = n_fft self.mel_buckets = melspec_buckets self.vad = VoiceActivityDetection( sample_rate=input_sr ) # This needs heavy tweaking, depending of the data self.mu_encode = MuLawEncoding() self.mu_decode = MuLawDecoding() self.meter = pyln.Meter(input_sr) self.final_sr = input_sr if output_sr is not None and output_sr != input_sr: self.resample = Resample(orig_freq=input_sr, new_freq=output_sr) self.final_sr = output_sr else: self.resample = lambda x: x
def test_abs_gate_test(): data, rate = sf.read("tests/data/1770-2_Comp_AbsGateTest.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) targetLoudness = -69.5 assert targetLoudness - 0.1 <= loudness <= targetLoudness + 0.1
def test_24LKFS_2000Hz_2ch(): data, rate = sf.read("tests/data/1770-2_Comp_24LKFS_2000Hz_2ch.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) targetLoudness = -24.0 assert targetLoudness - 0.1 <= loudness <= targetLoudness + 0.1
def test_conf_monovoice_music_23LKFS(): data, rate = sf.read("tests/data/1770-2_Conf_Mono_Voice+Music-23LKFS.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) targetLoudness = -23.0 assert targetLoudness - 0.1 <= loudness <= targetLoudness + 0.1
def test_conf_stereo_vinL_R_23LKFS(): data, rate = sf.read("tests/data/1770-2_Conf_Stereo_VinL+R-23LKFS.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) targetLoudness = -23.0 assert targetLoudness - 0.1 <= loudness <= targetLoudness + 0.1
def test_18LKFS_frequency_sweep(): data, rate = sf.read("tests/data/1770-2_Comp_18LKFS_FrequencySweep.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) targetLoudness = -18.0 assert targetLoudness - 0.1 <= loudness <= targetLoudness + 0.1
def test_loudness_normalize(): data, rate = sf.read("tests/data/sine_1000.wav") meter = pyln.Meter(rate) loudness = meter.integrated_loudness(data) norm = pyln.normalize.loudness(data, loudness, -6.0) loudness = meter.integrated_loudness(norm) assert loudness == -6.0
def get_perceptual_loudness(pydub_audio_segment): loudness_meter = pyloudnorm.Meter(pydub_audio_segment.frame_rate, block_size=0.2) sound_float_array = pydub_audiosegment_to_float_array( pydub_audio_segment, pydub_audio_segment.frame_rate, pydub_audio_segment.sample_width) return loudness_meter.integrated_loudness(sound_float_array)
def lufs_normalize(x, sr, lufs): # measure the loudness first meter = pyloudnorm.Meter(sr) # create BS.1770 meter loudness = meter.integrated_loudness(x) # loudness normalize audio to -12 dB LUFS loudness_normalized_audio = pyloudnorm.normalize.loudness(x, loudness, lufs) return loudness_normalized_audio
def reduce_noise( phrase_file, noise_file=None, sampling_rate=44100, lufs=-14.0, bitrate=128, ): """ Uses the noisereduce library to produce WAV files reducing the noise and normalising the volume to -14 LUFS """ noise_file = noise_file or path.join(CURRENT_DIR, 'noise.wav') if phrase_file[-3:] != 'wav': phrase_file = convert_audio(phrase_file, 'wav', sampling_rate=sampling_rate) with SuppressWarnings(['librosa', 'audioread']): noise, _ = librosa.load(noise_file, sr=sampling_rate) phrase, _ = librosa.load(phrase_file, sr=sampling_rate) create_tmp_dir() log.info(f'Reducing noise...') reduced_noise = noisereduce.reduce_noise( audio_clip=phrase, noise_clip=noise, verbose=False, ) log.info('Normalising loudness...') meter = pyloudnorm.Meter(sampling_rate) loudness = meter.integrated_loudness(reduced_noise) with SuppressWarnings(['pyloudnorm']): normalised_audio = pyloudnorm.normalize.loudness( reduced_noise, loudness, lufs) def _assign_ext(fpath, extension): return fpath[:len(fpath) - 4] + '.' + extension tmp_file = path.join(TMP_DIR, path.basename(phrase_file)) tmp_mp3 = _assign_ext(tmp_file, 'mp3') tmp_wav = _assign_ext(tmp_file, 'wav') wavfile.write(tmp_wav, sampling_rate, normalised_audio) if os.path.exists(tmp_mp3): os.remove(tmp_mp3) convert_audio(tmp_wav, 'mp3', sampling_rate, bitrate) os.remove(tmp_wav) return tmp_mp3
def resample_and_norm(signal, orig, target, lvl): if orig != target: signal = resample_poly(signal, target, orig) #fx = (AudioEffectsChain().custom("norm {}".format(lvl))) #signal = fx(signal) meter = pyloudnorm.Meter(target, block_size=0.1) loudness = meter.integrated_loudness(signal) signal = pyloudnorm.normalize.loudness(signal, loudness, lvl) return signal
def get_integrated_lufs(audio_array, samplerate, min_duration=0.5, filter_class='K-weighting', block_size=0.400): """ Returns the integrated LUFS for a numpy array containing audio samples. For files shorter than 400 ms pyloudnorm throws an error. To avoid this, files shorter than min_duration (by default 500 ms) are self-concatenated until min_duration is reached and the LUFS value is computed for the concatenated file. Parameters ---------- audio_array : np.ndarray numpy array containing samples or path to audio file for computing LUFS samplerate : int Sample rate of audio, for computing duration min_duration : float Minimum required duration for computing LUFS value. Files shorter than this are self-concatenated until their duration reaches this value for the purpose of computing the integrated LUFS. Caution: if you set min_duration < 0.4, a constant LUFS value of -70.0 will be returned for all files shorter than 400 ms. filter_class : str Class of weighting filter used. - 'K-weighting' (default) - 'Fenton/Lee 1' - 'Fenton/Lee 2' - 'Dash et al.' block_size : float Gating block size in seconds. Defaults to 0.400. Returns ------- loudness Loudness in terms of LUFS """ duration = audio_array.shape[0] / float(samplerate) if duration < min_duration: ntiles = int(np.ceil(min_duration / duration)) audio_array = np.tile(audio_array, (ntiles, 1)) meter = pyloudnorm.Meter(samplerate, filter_class=filter_class, block_size=block_size) loudness = meter.integrated_loudness(audio_array) # silent audio gives -inf, so need to put a lower bound. loudness = max(loudness, -70) return loudness
def get_loudness(waveform, sample_rate): """Compute the loudness of waveform using the pyloudnorm package. See https://github.com/csteinmetz1/pyloudnorm for more details on potential arguments to the functions below. Args: waveform (np.array, [T, ]): waveform to compute loudness on sample_rate (int > 0): sampling rate of waveform Returns: float: the loudness of self.waveform """ meter = pyln.Meter(sample_rate) return meter.integrated_loudness(waveform)
def loudness_featurize(audiofile): ''' from the docs https://github.com/danilobellini/audiolazy/blob/master/examples/formants.py ''' data, rate = sf.read( audiofile) # load audio (with shape (samples, channels)) meter = pyln.Meter(rate) # create BS.1770 meter loudness = meter.integrated_loudness(data) # measure loudness # units in dB features = [loudness] labels = ['Loudness'] print(dict(zip(labels, features))) return features, labels
def random_noise(y, sr, noise_signals, min_snr=6, max_snr=30, prob=1.): if np.random.uniform(0,1) < prob: meter = pyln.Meter(sr) snr = np.random.uniform(min_snr, max_snr) noise_signal = np.random.choice(noise_signals) if len(noise_signal) < len(y): raise Exception("length of the background noise signal is too short") noise_start = int(np.random.uniform(0, len(noise_signal)-len(y))) noise = noise_signal[noise_start:noise_start+len(y)] sig_loudness = meter.integrated_loudness(y) noise_loudness = meter.integrated_loudness(noise) loudness_normalized_noise = pyln.normalize.loudness(noise, noise_loudness, sig_loudness-snr) # Compute and adjust snr combined_sig = y + loudness_normalized_noise return combined_sig else: return y
def check_for_cliping(mixture_max, sources_list_norm): """Check the mixture (mode max) for clipping and re normalize if needed.""" # Initialize renormalized sources and loudness renormalize_loudness = [] clip = False # Recreate the meter meter = pyln.Meter(RATE) # Check for clipping in mixtures if np.max(np.abs(mixture_max)) > MAX_AMP: clip = True weight = MAX_AMP / np.max(np.abs(mixture_max)) else: weight = 1 # Renormalize for i in range(len(sources_list_norm)): new_loudness = meter.integrated_loudness(sources_list_norm[i] * weight) renormalize_loudness.append(new_loudness) return renormalize_loudness, clip
def process_file(data, fs, log): """ data - audio data to be processed \n fs - frame rate """ if np.issubdtype(data.dtype, np.integer): type_info = np.iinfo(data.dtype) else: type_info = np.finfo(data.dtype) max_amp = float(type_info.max) data = data / max_amp meter = pyln.Meter(fs) # create BS.1770 meter loudness = meter.integrated_loudness(data) return { 'value': loudness, }
def check_for_clipping(mixtures, sources_list_norm): renormalize_loudness = [] clips = [] for mixs, srcs in zip(mixtures, sources_list_norm): renorm_loudness = [] clip = False meter = pyln.Meter(RATE) # Check for clipping in mixtures if np.max(np.abs(mixs)) > MAX_AMP: clip = True weight = MAX_AMP / np.max(np.abs(mixs)) else: weight = 1 # Renormalize for i in range(len(srcs)): new_loudness = meter.integrated_loudness(srcs[i] * weight) renorm_loudness.append(new_loudness) renormalize_loudness.append(renorm_loudness) clips.append(clip) return renormalize_loudness, clips
def write_sound(sound, filename): peak_normalized_audio = pyln.normalize.peak(sound, -1.0) # measure the loudness first meter = pyln.Meter(44100) # create BS.1770 meter loudness = meter.integrated_loudness(sound) # loudness normalize audio to -12 dB LUFS loudness_normalized_audio = pyln.normalize.loudness(sound, loudness, -12.0) sound = 16000 * sound #increase gain wave_write = wave.open(filename, 'w') wave_write.setparams([1, 2, 44100, 10, 'NONE', 'noncompressed']) ssignal = '' for i in range(len(sound)): ssignal += wave.struct.pack('h', sound[i]) # transform to binary wave_write.writeframes(ssignal) wave_write.close()
def compute_mean_loudness(self) -> dict: print('[.] Computing mean loudness...') loudness = {track_name: [] for track_name in self._tracklist} meter = pyln.Meter(self._sr) for song_i, song_name in enumerate(self.songlist): print('{}/{}: {}'.format(song_i + 1, len(self.songlist), song_name)) for track_name in self._tracklist: track_path = self._get_track_path(song_name, track_name) track, _ = sf.read(track_path) track_loudness = meter.integrated_loudness(track) loudness[track_name].append(track_loudness) mean_loudness = { track_name: mean(loudness[track_name]) for track_name in loudness } return mean_loudness