def _test(method='AuxLaplaceIVA'): np.random.seed(111) # Room impulse response sr = 16000 reverb = 0.16 duration = 0.5 samples = int(duration * sr) mic_intervals = [8, 8, 8, 8, 8, 8, 8] mic_indices = [2, 5] degrees = [60, 300] titles = ['man-16000', 'woman-16000'] mixed_signal = _convolve_mird(titles, reverb=reverb, degrees=degrees, mic_intervals=mic_intervals, mic_indices=mic_indices, samples=samples) n_channels, T = mixed_signal.shape # STFT fft_size, hop_size = 2048, 1024 mixture = stft(mixed_signal, fft_size=fft_size, hop_size=hop_size) # IVA lr = 0.1 n_sources = len(titles) iteration = 200 if method == 'GradLaplaceIVA': iva = GradLaplaceIVA(lr=lr) iteration = 5000 elif method == 'NaturalGradLaplaceIVA': iva = NaturalGradLaplaceIVA(lr=lr) iteration = 200 elif method == 'AuxLaplaceIVA': iva = AuxLaplaceIVA() iteration = 50 else: raise ValueError("Not support method {}".format(method)) estimation = iva(mixture, iteration=iteration) estimated_signal = istft(estimation, fft_size=fft_size, hop_size=hop_size, length=T) print("Mixture: {}, Estimation: {}".format(mixed_signal.shape, estimated_signal.shape)) for idx in range(n_sources): _estimated_signal = estimated_signal[idx] write_wav("data/IVA/{}/mixture-{}_estimated-iter{}-{}.wav".format(method, sr, iteration, idx), signal=_estimated_signal, sr=sr) plt.figure() plt.plot(iva.loss, color='black') plt.xlabel('Iteration') plt.ylabel('Loss') plt.savefig('data/IVA/{}/loss.png'.format(method), bbox_inches='tight') plt.close()
def _test(method, n_bases=10, partitioning=False): np.random.seed(111) # Room impulse response sr = 16000 reverb = 0.16 duration = 0.5 samples = int(duration * sr) mic_intervals = [8, 8, 8, 8, 8, 8, 8] mic_indices = [2, 5] degrees = [60, 300] titles = ['man-16000', 'woman-16000'] mixed_signal = _convolve_mird(titles, reverb=reverb, degrees=degrees, mic_intervals=mic_intervals, mic_indices=mic_indices, samples=samples) n_sources, T = mixed_signal.shape # STFT fft_size, hop_size = 2048, 1024 mixture = stft(mixed_signal, fft_size=fft_size, hop_size=hop_size) # ILRMA n_channels = len(titles) iteration = 200 if method == 'Gauss': ilrma = GaussILRMA(n_bases=n_bases, partitioning=partitioning) elif method == 't': ilrma = tILRMA(n_bases=n_bases, partitioning=partitioning) else: raise ValueError("Not support {}-ILRMA.".format(method)) estimation = ilrma(mixture, iteration=iteration) estimated_signal = istft(estimation, fft_size=fft_size, hop_size=hop_size, length=T) print("Mixture: {}, Estimation: {}".format(mixed_signal.shape, estimated_signal.shape)) for idx in range(n_channels): _estimated_signal = estimated_signal[idx] write_wav("data/ILRMA/{}ILMRA/partitioning{}/mixture-{}_estimated-iter{}-{}.wav".format(method, int(partitioning), sr, iteration, idx), signal=_estimated_signal, sr=sr) plt.figure() plt.plot(ilrma.loss, color='black') plt.xlabel('Iteration') plt.ylabel('Loss') plt.savefig('data/ILRMA/{}ILMRA/partitioning{}/loss.png'.format(method, int(partitioning)), bbox_inches='tight') plt.close()
def _test(method='DSBF'): # Room impulse response sr = 16000 reverb = 0.16 duration = 0.5 samples = int(duration * sr) mic_intervals = [3, 3, 3, 8, 3, 3, 3] mic_indices = [0, 1, 2, 3, 4, 5, 6, 7] mic_position = np.array([[0.13, 0], [0.10, 0], [0.07, 0], [0.04, 0], [-0.04, 0], [-0.07, 0], [-0.10, 0], [-0.13, 0]]) degrees = [0, 90] titles = ['man-16000', 'woman-16000'] n_sources, n_channels = len(degrees), len(mic_indices) mixed_signal = _convolve_mird(titles, reverb=reverb, degrees=degrees, mic_intervals=mic_intervals, mic_indices=mic_indices, samples=samples) _, T = mixed_signal.shape # STFT fft_size, hop_size = 2048, 1024 n_bins = fft_size // 2 + 1 frequency = np.arange(0, n_bins) * sr / fft_size mixture = stft(mixed_signal, fft_size=fft_size, hop_size=hop_size) # (n_channels, n_bins, n_frames) # Steeing vectors degrees = np.array(degrees) / 180 * np.pi x_source, y_source = np.sin(degrees), np.cos(degrees) # (n_sources,) source_position = np.vstack([x_source, y_source]).transpose(1, 0) # (n_sources, 2) steering_vector = np.exp( 2j * np.pi * frequency[:, np.newaxis, np.newaxis] * np.sum(source_position * mic_position[:, np.newaxis, :], axis=2) / sound_speed) # (n_bins, n_channels, n_sources) steering_vector = steering_vector / np.sqrt(len(mic_indices)) if method == 'DSBF': beamformer = DelaySumBeamformer(steering_vector=steering_vector) elif method == 'MVDR': beamformer = MVDRBeamformer(steering_vector=steering_vector) else: raise NotImplementedError("Not support {} beamformer".format(method)) estimation = beamformer(mixture) spectrogram = np.abs(estimation) log_spectrogram = 10 * np.log10(spectrogram**2) N, F_bin, T_bin = log_spectrogram.shape t = np.arange(T_bin + 1) f = np.arange(F_bin + 1) for n in range(N): plt.figure() plt.pcolormesh(t, f, log_spectrogram[n], cmap='jet') plt.savefig("data/Beamform/{}/specrtogram-{}.png".format(method, n), bbox_inches='tight') plt.close() estimated_signal = istft(estimation, fft_size=fft_size, hop_size=hop_size, length=T) print("Mixture: {}, Estimation: {}".format(mixed_signal.shape, estimated_signal.shape)) for idx in range(n_sources): _estimated_signal = estimated_signal[idx] write_wav("data/Beamform/{}/mixture-{}_estimated-{}.wav".format( method, sr, idx), signal=_estimated_signal, sr=sr)
def _test(metric='EUC'): np.random.seed(111) fft_size, hop_size = 1024, 256 n_bases = 6 iteration = 100 signal, sr = read_wav("data/single-channel/music-8000.wav") T = len(signal) spectrogram = stft(signal, fft_size=fft_size, hop_size=hop_size) amplitude = np.abs(spectrogram) power = amplitude**2 if metric == 'EUC': nmf = EUCNMF(n_bases) elif metric == 'IS': nmf = ISNMF(n_bases) elif metric == 'KL': nmf = KLNMF(n_bases) else: raise NotImplementedError("Not support {}-NMF".format(metric)) nmf.update(power, iteration=iteration) amplitude[amplitude < EPS] = EPS estimated_power = nmf.base @ nmf.activation estimated_amplitude = np.sqrt(estimated_power) ratio = estimated_amplitude / amplitude estimated_spectrogram = ratio * spectrogram estimated_signal = istft(estimated_spectrogram, fft_size=fft_size, hop_size=hop_size, length=T) estimated_signal = estimated_signal / np.abs(estimated_signal).max() write_wav("data/NMF/{}/music-8000-estimated-iter{}.wav".format( metric, iteration), signal=estimated_signal, sr=8000) power[power < EPS] = EPS log_spectrogram = 10 * np.log10(power) plt.figure() plt.pcolormesh(log_spectrogram, cmap='jet') plt.colorbar() plt.savefig('data/NMF/spectrogram.png', bbox_inches='tight') plt.close() for idx in range(n_bases): estimated_power = nmf.base[:, idx:idx + 1] @ nmf.activation[idx:idx + 1, :] estimated_amplitude = np.sqrt(estimated_power) ratio = estimated_amplitude / amplitude estimated_spectrogram = ratio * spectrogram estimated_signal = istft(estimated_spectrogram, fft_size=fft_size, hop_size=hop_size, length=T) estimated_signal = estimated_signal / np.abs(estimated_signal).max() write_wav("data/NMF/{}/music-8000-estimated-iter{}-base{}.wav".format( metric, iteration, idx), signal=estimated_signal, sr=8000) estimated_power[estimated_power < EPS] = EPS log_spectrogram = 10 * np.log10(estimated_power) plt.figure() plt.pcolormesh(log_spectrogram, cmap='jet') plt.colorbar() plt.savefig( 'data/NMF/{}/estimated-spectrogram-iter{}-base{}.png'.format( metric, iteration, idx), bbox_inches='tight') plt.close() plt.figure() plt.plot(nmf.loss, color='black') plt.xlabel('Iteration') plt.ylabel('Loss') plt.savefig('data/NMF/{}/loss.png'.format(metric), bbox_inches='tight') plt.close()