Exemplo n.º 1
0
def main():
    net, _ = misc.load_latest('/home/eriklarsson/rirnet/timeconv/models', 'net')
    
    fs = 16384
    n_fft = 128

    sound_engine = SoundEngine('/home/eriklarsson/rirnet/audio/chamber/val', 44100)
    anechoic_signal = sound_engine.random()

    rir_real, _ = au.read_wav('/home/eriklarsson/rirnet/audio/rirs/lecture.wav', 44100)
    rir_real = rir_real[:44100//2]
    rev_real = au.resample(au.convolve(rir_real, anechoic_signal), 44100, fs)
    
    _, _, rev_spectrogram = sp.signal.stft(rev_real, fs=fs, nfft=n_fft, nperseg=n_fft)
    net_input = torch.from_numpy(-np.log(np.abs(rev_spectrogram))).unsqueeze(0).float()

    with torch.no_grad():
        net_output = net(net_input).squeeze().numpy()
    phase = np.exp(1j*np.random.uniform(low = -np.pi, high = np.pi, size = np.shape(net_output)))
    _, rir_net = sp.signal.istft(net_output*phase, fs, nfft=n_fft, nperseg=n_fft)
    plt.imshow(net_output)
    plt.show()
    rir_net = au.resample(rir_net, fs, 44100)

    anechoic_test, _ = au.read_wav('/home/eriklarsson/rirnet/audio/harvard/male.wav')
    anechoic_test = anechoic_test[250000:400000,0]

    rev_real_test = au.convolve(rir_real, anechoic_test)
    rev_net_test = au.convolve(rir_net, anechoic_test)
    
    au.save_wav('real.wav', rev_real_test, 44100, True)
    au.save_wav('net.wav', rev_net_test, 44100, True)
Exemplo n.º 2
0
def reconstruct_rir_conv(time, alpha):
    fdl = 81
    fdl2 = (fdl-1) // 2
    time = (time.astype('double')+1)*1024
    alpha = np.exp(-alpha).astype('double')
    signs = np.random.randint(0,2, len(alpha))*2-1
    #alpha *= signs
    ir = np.arange(np.ceil((1.05*time.max()) + fdl))*0

    inds = np.argsort(time)
    time = np.round(time[inds]).astype(int)
    alpha = alpha[inds]

    print(time)

    peaks = np.zeros(np.max(time)+1)
    for n, t in enumerate(time):
        peaks[t] += alpha[n]

    #peaks[time] = alpha

    ir = au.convolve(peaks, np.hanning(fdl)*np.sinc(np.linspace(-41, 41, 81)))

    start_ind = min(np.where(ir > 10**(-10))[0])
    ir = ir[start_ind:]

    return ir
Exemplo n.º 3
0
def convolve_and_pad(wav, h_list):
    data_list = []

    for i_h, h in enumerate(h_list):
        y = au.convolve(wav, h)
        y_length = au.next_power_of_two(np.size(y))
        data = au.pad_to(y, y_length, 0)
        data_list.append(data)

    return np.array(data_list)
Exemplo n.º 4
0
def generate_waveforms(wav, h_list):
    data_list = []
    target_list = []

    for i_h, h in enumerate(h_list):
        y = au.convolve(wav, h)
        y_length = au.next_power_of_two(np.size(y))
        data = au.pad_to(y, y_length, 0)
        target = au.pad_to(h, y_length, 0)
        target_list.append(target)
        data_list.append(data)
    return np.array(target_list), np.array(data_list)
Exemplo n.º 5
0
    def run(self):
        self.extractor.eval()
        with torch.no_grad():
            for batch_idx, (source, target) in enumerate(self.eval_loader):
                source, target = source.to(self.device), target[0].numpy()

                latent_source = self.extractor(source)
                output = self.autoencoder(latent_source, encode=False, decode=True)[0].cpu().numpy()

                filled_times_output, filled_alphas_output = misc.fill_peaks(output[0,:], output[1,:])
                filled_times_target, filled_alphas_target = misc.fill_peaks(target[0,:], target[1,:])

                output_rir = misc.reconstruct_rir(filled_times_output, filled_alphas_output)
                target_rir = misc.reconstruct_rir(filled_times_target, filled_alphas_target)

                rev_signal_output = au.convolve(self.audio_anechoic, output_rir)
                rev_signal_target = au.convolve(self.audio_anechoic, target_rir)

                au.save_wav('output.wav', rev_signal_output, self.fs, 1)
                au.save_wav('target.wav', rev_signal_target, self.fs, 1)

                au.play_file('output.wav')
                au.play_file('target.wav')
Exemplo n.º 6
0
def generate_spectrograms(queue, args):
    x_max, y_max, z_max, n_mics, n_per_seg, max_order, fs, material_engine, sound_engine = args
    np.random.seed()
    x, y, z = np.random.rand(3) * (np.array([x_max, y_max, z_max]) - 2.5) + 2.5
    mic_pos = rg.generate_pos_in_rect(x, y, z, n_mics)
    source_pos = rg.generate_pos_in_rect(x, y, z, 1)[0]
    absorption = material_engine.random()
    an_sig = sound_engine.random()

    rir_list = rg.generate_multiband_rirs(x, y, z, mic_pos, source_pos, fs,
                                          max_order, absorption)
    rev_sig_spectrograms = []
    rir_spectrograms = []
    for rir in rir_list:
        rev_sig = au.convolve(rir, an_sig)
        _, _, rir_spectrogram = sp.signal.stft(rir, fs=fs, nperseg=n_per_seg)
        _, _, rev_sig_spectrogram = sp.signal.stft(rev_sig,
                                                   fs=fs,
                                                   nperseg=n_per_seg)
        rev_sig_spectrograms.append(np.abs(rev_sig_spectrogram))
        rir_spectrograms.append(np.abs(rir_spectrogram))

    queue.put([rev_sig_spectrograms, rir_spectrograms])
Exemplo n.º 7
0
def main():
    net_timeconv, _ = misc.load_latest('/home/felix/rirnet/timeconv/models',
                                       'net')
    net_peaks_ae, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16',
                                       'autoencoder')
    net_peaks_ext, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16',
                                        'extractor')

    x, y, z = 6, 9, 3
    mic_pos = rg.generate_pos_in_rect(x, y, z, 1)
    source_pos = rg.generate_pos_in_rect(x, y, z, 1)[0]
    fs_peaks = 44100
    fs_timeconv = 16384
    n_fft = 128

    sound_engine = SoundEngine('/home/felix/rirnet/audio/chamber/val',
                               fs_peaks)
    material_engine = MaterialEngine('/home/felix/rirnet/wip/materials.csv',
                                     '/home/felix/rirnet/wip/surfaces.csv')
    abs_coeffs = material_engine.random()

    multiband_rir = rg.generate_multiband_rirs(x, y, z, mic_pos, source_pos,
                                               fs_timeconv, 60, abs_coeffs)[0]
    monoband_rir = generate_monoband_rir(x, y, z, mic_pos, source_pos,
                                         fs_peaks, 8, abs_coeffs)

    an_sig_peaks = sound_engine.random()
    an_sig_timeconv = au.resample(an_sig_peaks, fs_peaks, fs_timeconv)

    rev_sig_multi = au.convolve(multiband_rir, an_sig_timeconv)
    _, _, rev_sig_multi_spectrogram = sp.signal.stft(rev_sig_multi,
                                                     fs=fs_timeconv,
                                                     nfft=n_fft,
                                                     nperseg=n_fft)
    input_timeconv = torch.from_numpy(
        -np.log(np.abs(rev_sig_multi_spectrogram))).unsqueeze(0).float()

    rev_sig_mono = au.pad_to(au.convolve(monoband_rir, an_sig_peaks), 2**16)
    input_peaks = preprocess_peaks(rev_sig_mono, fs_peaks)

    with torch.no_grad():
        output_timeconv = net_timeconv(input_timeconv).squeeze().numpy()
        output_peaks = net_peaks_ae(net_peaks_ext(input_peaks),
                                    decode=True).squeeze().numpy()
        plt.figure()
        plt.imshow(output_timeconv)
        plt.show()
    phase = np.exp(1j * np.random.uniform(
        low=-np.pi, high=np.pi, size=np.shape(output_timeconv)))
    _, output_timeconv = sp.signal.istft(output_timeconv * phase,
                                         fs_timeconv,
                                         nfft=n_fft,
                                         nperseg=n_fft)

    plt.subplot(221)
    plt.plot(output_timeconv)
    plt.subplot(222)
    rev_output = au.convolve(output_timeconv, an_sig_timeconv)
    plt.plot(rev_output / np.max(np.abs(rev_output)))
    #plt.scatter(output_peaks[0], output_peaks[1])
    plt.subplot(223)
    plt.plot(multiband_rir)
    plt.subplot(224)
    plt.plot(rev_sig_multi / np.max(np.abs(rev_sig_multi)))
    plt.show()

    au.save_wav('synthetic.wav', rev_output, fs_timeconv, True)
    au.save_wav('tru.wav', rev_sig_multi, fs_timeconv, True)
Exemplo n.º 8
0
def main(audio_path):
    room = rg.generate(4, 10, 2, 3, 10, max_order=8)
    room.plot(mic_marker_size=30)

    room.compute_rir()
    rir = room.rir[0][0]
    first_index = next((i for i, x in enumerate(rir) if x), None)
    rir = rir[first_index:] / max(abs(rir))
    t_rir = np.arange(len(rir)) / 44100.

    sound, rate = au.read_wav(audio_path)
    t_sound = np.arange(len(sound)) / 44100.

    signal = au.convolve(sound, rir)
    signal /= max(abs(signal))
    t_signal = np.arange(len(signal)) / 44100.

    mic = room.mic_array.R.T[0]
    distances = room.sources[0].distance(mic)
    times = distances / 343.0 * room.fs
    alphas = room.sources[0].damping / (4. * np.pi * distances)
    slice = tuple(np.where(room.visibility[0][0] == 1))
    alphas = -np.log(alphas[slice])
    alphas -= min(alphas)
    times = (times[slice] - min(times[slice])) / 44100.
    right_lim = max(times)

    mfcc = librosa.feature.mfcc(y=signal, sr=44100., n_mels=40)

    eps = 0.1

    plt.figure()

    ax = plt.subplot(2, 2, 1)
    plt.plot(t_sound, sound)
    plt.title('Anechoic sound')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(t_sound), right_lim)
    ax.set_ylim(-1 - eps, 1 + eps)

    ax = plt.subplot(2, 2, 2)
    plt.plot(t_rir, rir)
    plt.title('Room IRF')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(t_rir), right_lim)
    ax.set_ylim(-1 - eps, 1 + eps)

    ax = plt.subplot(2, 2, 3)
    plt.plot(t_signal, signal)
    plt.title('Reverberant sound')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(t_signal), right_lim)
    ax.set_ylim(-1 - eps, 1 + eps)

    ax = plt.subplot(2, 2, 4)
    plt.plot(times, alphas, '.')
    plt.title('Peaks data')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(times) - 0.002, right_lim + 0.002)

    plt.figure()
    specshow(mfcc, sr=44100, x_axis='time')
    plt.title('MFCC spectrogram')
    plt.xlabel('Time (s)')
    plt.show()