def THDN(f0, signal, sample_rate): signal = signal.astype(np.double) signal = signal * scipy.signal.hann(len(signal)) if not rms(signal) > 0: print "no signal" return #f0 Frequency to be removed from signal (Hz) q = math.trunc(math.sqrt(f0) / 2.) w0 = f0 / (sample_rate / 2.) b, a = scipy.signal.iirnotch(w0, q) filtered_signal = scipy.signal.filtfilt(b, a, signal) signal_original = signal #scipy.io.wavfile.write("filtered.wav", sample_rate, filtered_signal / max(signal)) total_rms = rms(signal_original) other_rms = rms(filtered_signal) print "origin rms: %.2f filtered rms: %.2f" % (total_rms, other_rms) # thdn is (noise+harmonic) / fundamental thdn = other_rms / total_rms print "THD+N: %.1f%% or %.1f dB" % (thdn * 100, 20 * math.log10(thdn)) return (thdn, 0, 0)
def generate(sound, sampling_rate, duration, num, logmel, output): """Generates num variations of specified sound and saves to disk""" os.makedirs(output, exist_ok=True) # Select generator if sound == 'sine': generator = sine elif sound == 'square': generator = square elif sound == 'white_noise': generator = white_noise elif sound == 'click': generator = click else: raise ValueError('Selected sound {} is not implemented'.format(sound)) for i in range(num): # Generate signal = generator(sampling_rate, duration) # Save audio filename = os.path.join(output, sound + '-' + format(i, '06d')) scipy.io.wavfile.write(filename + '.wav', sampling_rate, signal.astype(np.float32)) if logmel: # Save spectrogram spectrogram = logmelspectrogram(signal, sampling_rate) np.save(filename + '.npy', spectrogram)
def denoise_audio( signal: np.ndarray, rate: int, including_multipass: bool = True ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, int, int]: """ Denoises given audio signal. :param signal: audio signal as int16 values :param rate: audio sample rate in samples per second :param including_multipass: also use Multi-band Spectral subtraction for advanced denoising. Caution: returned audio length may then be smaller. :returns: denoised audio signal as int16 values :returns: intervals of pure noise :returns: start of used noise interval :returns: end of used noise interval """ intervals = get_noise_intervals(signal, rate) a, b = get_largest_noise_interval(intervals) noisy_part = signal[a:b] # perform noise reduction reduced_noise = nr.reduce_noise(audio_clip=signal.astype(np.float16), noise_clip=noisy_part.astype(np.float16), verbose=False).astype(np.int16) noise_signal = signal - reduced_noise # Call multipass if needed if including_multipass: voice_leakage = multiband_substraction_denoise(noise_signal, rate, a, b) noise_signal = noise_signal[:voice_leakage.size] - voice_leakage return reduced_noise, noise_signal, intervals, a, b
def audioread(path, offset=0.0, duration=None, samp_rate=16000): signal, sr = librosa.load(path, mono=False, sr=samp_rate, offset=offset, duration=duration) return signal.astype(np.float32)
def generate_sine(max_amp=100, noise_amp=0, duration=2.0, sample_rate=8000): """ Generates a sinewave :param max_amp: max amplitude of the sinusoid :type max_amp: int :param noise_amp: max amplitude of the added noise :type noise_amp: int :param duration: duration of the signal in seconds :type duration: float :param sample_rate: sampling rate of the generated digital signal :type sample_rate: int :return: signal :rtype: numpy.ndarray, dtype: int """ pitch = 200 time = np.linspace(0, duration, int(duration * sample_rate), endpoint=False) signal = max_amp * np.sin(2 * np.pi * pitch * time) signal = signal.astype(int) noise = np.random.normal(0, noise_amp, len(signal)) signal += noise.astype(int) return signal
def mulaw_encode(self, wav, qc): mu = qc - 1 wav_abs = np.minimum(np.abs(wav), 1.0) magnitude = np.log(1 + mu * wav_abs) / np.log(1. + mu) signal = np.sign(wav) * magnitude # Quantize signal to the specified number of levels. signal = (signal + 1) / 2 * mu + 0.5 return signal.astype(np.int32)
def wavread(path): f = wave.open(path) channels = f.getnchannels() sampwidth = f.getsampwidth() assert channels == 1 and sampwidth == 2 nframes = f.getnframes() signal = np.fromstring(f.readframes(nframes), dtype=np.short) signal /= int16_ampmax return signal.astype(np.float32)
def _deconvolve(signal, kernel): _h = fft(kernel) length = len(signal) - len(kernel) + 1 kernel = np.hstack( (kernel, np.zeros(len(signal) - len(kernel), dtype=np.float32))) # zero pad the kernel to same length H = fft(kernel) deconvolved = ifft( fft(signal.astype(np.float32)) * np.conj(H) / (H * np.conj(H))) return deconvolved[:length]
def signal(self, normalize_signal=False): (_, signal) = scipy.io.wavfile.read(self.wav) self._signal_shape = signal.shape # There is an issue in numpy where np.linalg.norm can overflow for # 16bit ints. Thus the signal is converted to float32 if normalized # https://github.com/numpy/numpy/issues/6128 if (normalize_signal): signal = signal.astype('float32') signal = signal / np.linalg.norm(signal) return signal
def __setitem__(self, key, value): rate, signal = value assert isinstance(rate, int), type(rate) assert isinstance(signal, np.ndarray), type(signal) wav = self.dir / f'{key}.{self.format}' wav.parent.mkdir(parents=True, exist_ok=True) if self.dtype is not None: signal = signal.astype(self.dtype) if self.format == 'wav': scipy_wav.write(wav, rate, signal) else: soundfile.write(str(wav), signal, rate) self.fscp.write(f'{key} {wav}\n')
def read_audio(filename): spf = wave.open(filename,'r') signal = spf.readframes(-1) signal = np.fromstring(signal, 'Int16') p = spf.getnchannels() f = spf.getframerate() sound_info = np.zeros(len(signal),dtype=float) signal = signal.astype(np.float) sound_info = signal/max(signal) #sound_info = sound_info[1:len(sound_info):2] if p==2: sound_info = scipy.signal.decimate(sound_info,2) return p ,f , sound_info
def read_audio(filename): spf = wave.open(filename, 'r') signal = spf.readframes(-1) signal = np.fromstring(signal, 'Int16') p = spf.getnchannels() f = spf.getframerate() sound_info = np.zeros(len(signal), dtype=float) signal = signal.astype(np.float) sound_info = signal / max(signal) #sound_info = sound_info[1:len(sound_info):2] if p == 2: sound_info = scipy.signal.decimate(sound_info, 2) return p, f, sound_info
def raw(signal): ''' compute the raw audio signal with limited range Args: signal: the audio signal from which to compute features. Should be an N*1 array Returns: A numpy array of size (N by 1) containing the raw audio limited to a range between -1 and 1 ''' feat = signal.astype(numpy.float32) / numpy.max(numpy.abs(signal)) return feat[:, numpy.newaxis]
def THDNoctave(f0, signal, sample_rate): signal = signal.astype(np.double) sample_rate = float(sample_rate) if not rms(signal) > 0: print "no signal" return t = np.arange(0, len(signal) / sample_rate, 1 / sample_rate) T = len(signal) / sample_rate freq = f0 # condition input time vector input_error = 0 # add two samples to complete the last cycle. dtt = 1 / sample_rate x = signal # truncate extra samples, to fit in an integer number of cycles of freq T = math.floor(T * freq) / freq # resample on a linear grid: # t1, x1 is the new input, not including the last sample x = x - sum(x) / len(x) # remove any DC offset N = max(1e6, len(x)) # number of samples dt = T / N t1 = np.arange(0, t[-1], dt) # 0:dt:(T-dt); x1 = interp1d(t, x, kind='cubic')(t1) #interp1(t,x,t1,'cubic'); # compute cos-sin fourier coefficients w = 2 * math.pi * freq acs = (2 / T) * sum( x1 * np.cos(w * t1)) * dt # basic frequency cos coefficient. bsn = (2 / T) * sum( x1 * np.sin(w * t1)) * dt # basic frequency sin coefficient. amp = (acs**2 + bsn**2)**0.5 ph = math.pi / 2 - np.sign(acs) * math.acos(bsn / amp) rms22 = (2 / T) * sum(x1**2) * dt THD = (rms22 / amp**2 - 1)**0.5 # correct phase to be in the range [-pi : pi] if ph > math.pi: ph = ph - 2 * math.pi if ph < -math.pi: ph = ph + 2 * math.pi print "THD+N: %.1f%% or %.1f dB" % (THD * 100, 20 * math.log10(THD)) return THD, ph, amp
def read_input(wavfile): """ Reads in the wavfile as a numpy array and cleans and normalizes the signal. Returns the normalized signal and the sampling frequency. Args: wavfile: file path to .wav file Returns: signal: numpy array of the wavfile sampled at fs fs: sampling frequency """ signal, fs = sf.read(wavfile) signal = signal.astype(np.float64) # Cleaning signal = signal / np.abs(np.max(signal)) # Normalization return signal, fs
def wf_test(signal, noise, signal_boost, npix = 400): pixel_space = ift.RGSpace([npix, npix]) fourier_space = pixel_space.get_default_codomain() signal_field = ift.Field.from_global_data(pixel_space, signal.astype(float)) HT = ift.HartleyOperator(fourier_space, target=pixel_space) power_field = ift.power_analyze(HT.inverse(signal_field), binbounds=ift.PowerSpace.useful_binbounds(fourier_space, True)) Sh = ift.create_power_operator(fourier_space, power_spectrum=power_field) R = HT noise_field = ift.Field.from_global_data(pixel_space, noise.astype(float)) noise_power_field = ift.power_analyze(HT.inverse(noise_field), binbounds=ift.PowerSpace.useful_binbounds(fourier_space, True)) N = ift.create_power_operator(HT.domain, noise_power_field) N_inverse = HT@[email protected] amplify = len(signal_boost) s_data = np.zeros((amplify, npix, npix)) m_data = np.zeros((amplify, npix, npix)) d_data = np.zeros((amplify, npix, npix)) for i in np.arange(amplify): data = noise_field # Wiener filtering the data j = (R.adjoint @N_inverse.inverse)(data) D_inv = R.adjoint @ N_inverse.inverse @ R + Sh.inverse IC = ift.GradientNormController(iteration_limit=500, tol_abs_gradnorm=1e-3) D = ift.InversionEnabler(D_inv, IC, approximation=Sh.inverse).inverse m = D(j) s_data[i,:,:] = (signal_field * signal_boost[i]).to_global_data() m_data[i,:,:] = HT(m).to_global_data() d_data[i,:,:] = data.to_global_data() return (s_data, m_data, d_data)
def F0_detection_wav(wav_path, signal, args): """F0_detection_wav.""" f0_max = 1100.0 f0_min = 50.0 frame_shift = 30 / 1000 if wav_path is not None: signal, osr = librosa.load(wav_path, sr=None) else: osr = args.sampling_rate seg_signal = signal.astype("double") _f0, t = pw.harvest( seg_signal, osr, f0_floor=f0_min, f0_ceil=f0_max, frame_period=frame_shift * 1000, ) _f0 = pw.stonemask(seg_signal, _f0, t, osr) return _f0
def process(self, signal): # ensure the signal is correct if signal.nchannels != 1: raise ValueError( 'signal must have one dimension, but it has {}'.format( signal.nchannels)) if self.sample_rate != signal.sample_rate: raise ValueError('processor and signal mismatch in sample rates: ' '{} != {}'.format(self.sample_rate, signal.sample_rate)) # force the signal to be int16 signal = signal.astype(np.int16) # extract the features data = self._rastaplp(signal) return Features(data.T.astype(np.float32), self.times(data.T.shape[0]), properties=self.get_properties())
def _compute(self, signal, vtln_warp): """Reimplementation of Kaldi OfflineFeatureTpl::Compute From src/feat/feature-common.h. Reimplementation needed to integrate Rasta filtering. """ rows_out = kaldi.feat.window.num_frames(signal.nsamples, self._frame_options) cols_out = self.ndims if rows_out == 0: # pragma: nocover return np.zeros((0, 0)) # force the input signal to be 16 bits integers signal = kaldi.matrix.SubVector(signal.astype(np.int16).data) # allocate the output data output = kaldi.matrix.Matrix(rows_out, cols_out) # windowed waveform and windowing function window = kaldi.matrix.Vector() window_function = kaldi.feat.window.FeatureWindowFunction.from_options( self._frame_options) # for each frame for row in range(rows_out): # extract its window and its log energy... raw_log_energy = _extract_window( 0, signal, row, self._frame_options, window_function, window, self.use_energy and self.raw_energy) # ... and extract PLP with optional Rasta filtering self._compute_frame(raw_log_energy, vtln_warp, window, output[row, :]) return output.numpy()
def decompress(self, f, audio_data): print('Decompressing (STFT)...') data = f.read(5) if data != b'ANMFS': raise Exception('Invalid file format. Expected .anmfs.') channel_count, sample_rate = struct.unpack('<HI', f.read(6)) audio_data.sample_rate = sample_rate for i in range(channel_count): channel = Channel() # read magnitude matrix chunk count chunk_count = struct.unpack('<I', f.read(4))[0] # read phases matrix Prows, Plen = struct.unpack('<II', f.read(8)) Pbytes = f.read(Plen) # Huffman decode the matrix to gain quantized values Pq = self.Phuffman.decode_int_matrix(Pbytes, Prows) # multiply each value by step to gain original values phases = self.Pdequantize_vec(Pq) # read and multiply NMF chunks to obtain magnitude matrix chunks = list() for _ in range(chunk_count): # read minimum value min_val = struct.unpack('<d', f.read(8))[0] # read min and max for re-scaling matrix_min, matrix_max = struct.unpack('<dd', f.read(16)) # read companded scaled matrix W Wscs = deserialize_matrix(f, 'I') # read Huffman encoded matrix H Hrows, Hlen = struct.unpack('<II', f.read(8)) Hbytes = f.read(Hlen) # scale matrix W back Wsc = self.scale_matrix(Wscs, 0, 2**32, 0, 1) # Huffman decode the matrix to gain quantized values Hscq = self.Hhuffman.decode_int_matrix(Hbytes, Hrows) # multiply each value by step to gain original values Hsc = self.Hdequantize_vec(Hscq) # expand the scaled matrices using mu-law Ws = self.expand(Wsc, self.MU_LAW_W) Hs = self.expand(Hsc, self.MU_LAW_H) # scale matrices back to normal W = self.scale_matrix(Ws, 0, 1, matrix_min, matrix_max) H = self.scale_matrix(Hs, 0, 1, matrix_min, matrix_max) # get original chunk back mag_chunk = nmf_matrix_original(W, H, min_val) # append it to the list chunks.append(mag_chunk) # concatenate magnitude matrix chunks magnitudes = numpy.concatenate(chunks) # join matrices back into the original STFT matrix stft = magnitudes * numpy.cos( phases) + 1j * magnitudes * numpy.sin(phases) # transpose back into original form stft = numpy.transpose(stft) # run inverse STFT signal = scipy.signal.istft( stft, fs=audio_data.sample_rate, window='hann', noverlap=self.FRAME_SIZE // 2, nperseg=self.FRAME_SIZE, )[1] # convert back to 16-bit signed signal = signal.astype(numpy.int16) # add samples to channel and finalize channel.add_sample_array(signal) audio_data.add_channel(channel)
def rms(signal): signal = signal.astype('float64') return np.mean((signal * signal))**0.5
def encode(signal): return signal.astype(np.float32).tostring()
def _signal_arma_burg(signal, order=16, criteria="KIC", corrected=True): # Sanitize order and signal if order <= 0.0: raise ValueError("Order must be > 0") if order > len(signal): raise ValueError("Order must be less than length signal minus 2") if not isinstance(signal, np.ndarray): signal = np.array(signal) N = len(signal) # Initialisation # rho is variance of driving white noise process (prediction error) rho = sum(abs(signal)**2.0) / float(N) denominator = rho * 2.0 * N ar = np.zeros(0, dtype=complex) # AR parametric signal model estimate ref = np.zeros( 0, dtype=complex ) # vector K of reflection coefficients (parcor coefficients) ef = signal.astype(complex) # forward prediction error eb = signal.astype(complex) # backward prediction error temp = 1.0 # Main recursion for k in range(0, order): # calculate the next order reflection coefficient numerator = sum( [ef[j] * eb[j - 1].conjugate() for j in range(k + 1, N)]) denominator = temp * denominator - abs(ef[k])**2 - abs(eb[N - 1])**2 kp = -2.0 * numerator / denominator # Update the prediction error temp = 1.0 - abs(kp)**2.0 new_rho = temp * rho if criteria is not None: # k=k+1 because order goes from 1 to P whereas k starts at 0. residual_new = _criteria(criteria=criteria, N=N, k=k + 1, rho=new_rho, corrected=corrected) if k == 0: residual_old = 2.0 * abs(residual_new) # Stop as criteria has reached if residual_new > residual_old: break # This should be after the criteria residual_old = residual_new rho = new_rho if rho <= 0: raise ValueError( "Found a negative value (expected positive strictly) %s." "Decrease the order" % rho) ar = np.resize(ar, ar.size + 1) ar[k] = kp if k == 0: for j in range(N - 1, k, -1): ef_previous = ef[j] # previous value ef[j] = ef_previous + kp * eb[j - 1] # Eq. (8.7) eb[j] = eb[j - 1] + kp.conjugate() * ef_previous else: # Update the AR coeff khalf = (k + 1) // 2 # khalf must be an integer for j in range(0, khalf): ar_previous = ar[j] # previous value ar[j] = ar_previous + kp * ar[k - j - 1].conjugate() # Eq. (8.2) if j != k - j - 1: ar[k - j - 1] = ar[k - j - 1] + kp * ar_previous.conjugate() # Eq. (8.2) # Update the forward and backward prediction errors for j in range(N - 1, k, -1): ef_previous = ef[j] # previous value ef[j] = ef_previous + kp * eb[j - 1] # Eq. (8.7) eb[j] = eb[j - 1] + kp.conjugate() * ef_previous # save the reflection coefficient ref = np.resize(ref, ref.size + 1) ref[k] = kp return ar, rho, ref