def test_dtw_aligner(): from nnmnkwii.preprocessing.alignment import DTWAligner, IterativeDTWAligner x, fs = librosa.load(example_audio_file(), sr=None) assert fs == 16000 x_fast = librosa.effects.time_stretch(x, 2.0) X = _get_mcep(x, fs) Y = _get_mcep(x_fast, fs) D = X.shape[-1] # Create padded pair X, Y = adjust_frame_lengths(X, Y, divisible_by=2) # Add utterance axis X = X.reshape(1, -1, D) Y = Y.reshape(1, -1, D) X_aligned, Y_aligned = DTWAligner().transform((X, Y)) assert X_aligned.shape == Y_aligned.shape assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y) X_aligned, Y_aligned = IterativeDTWAligner(n_iter=2, max_iter_gmm=10, n_components_gmm=2).transform( (X, Y)) assert X_aligned.shape == Y_aligned.shape assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y) # Custom dist function from nnmnkwii.metrics import melcd X_aligned, Y_aligned = DTWAligner(dist=melcd).transform((X, Y)) assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y)
def test_mulaw_real(): fs, x = wavfile.read(example_audio_file()) x = (x / 32768.0).astype(np.float32) mu = 256 y = P.mulaw_quantize(x, mu) assert y.min() >= 0 and y.max() < mu assert y.dtype == np.int x = P.inv_mulaw_quantize(y, mu) * 32768 assert x.dtype == np.float32 x = x.astype(np.int16)
def test_trim_remove_zeros_frames(): fs, x = wavfile.read(example_audio_file()) frame_period = 5 x = x.astype(np.float64) f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period) spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs) aperiodicity = pyworld.d4c(x, f0, timeaxis, fs) for mat in [spectrogram, aperiodicity]: trimmed = trim_zeros_frames(mat) assert trimmed.shape[1] == mat.shape[1] for mat in [spectrogram, aperiodicity]: trimmed = remove_zeros_frames(mat) assert trimmed.shape[1] == mat.shape[1]