Пример #1
0
def _convolve_mird(titles, reverb=0.160, degrees=[0], mic_intervals=[8,8,8,8,8,8,8], mic_indices=[0], samples=None):
    intervals = '-'.join([str(interval) for interval in mic_intervals])

    T_min = None

    for title in titles:
        source, _ = read_wav("data/single-channel/{}.wav".format(title))
        T = len(source)
        if T_min is None or T < T_min:
            T_min = T

    mixed_signals = []

    for mic_idx in mic_indices:
        _mixture = 0
        for title_idx in range(len(titles)):
            degree = degrees[title_idx]
            title = titles[title_idx]
            rir_path = "data/MIRD/Reverb{:.3f}_{}/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_{:.3f}s)_{}_1m_{:03d}.mat".format(reverb, intervals, reverb, intervals, degree)
            rir_mat = loadmat(rir_path)

            rir = rir_mat['impulse_response']

            if samples is not None:
                rir = rir[:samples]

            source, sr = read_wav("data/single-channel/{}.wav".format(title))
            _mixture = _mixture + np.convolve(source[:T_min], rir[:, mic_idx])
        
        mixed_signals.append(_mixture)
    
    mixed_signals = np.array(mixed_signals)

    return mixed_signals
Пример #2
0
    def __getitem__(self, idx):
        """
        Returns:
            mixture (1, T) <torch.Tensor>
            sources (n_sources, T) <torch.Tensor>
            segment_IDs (n_sources,) <list<str>>
        """
        data = self.json_data[idx]
        sources = []

        for key in data['sources'].keys():
            source_data = data['sources'][key]
            start, end = source_data['start'], source_data['end']
            wav_path = os.path.join(self.wav_root, source_data['path'])
            wave, sr = read_wav(wav_path)
            wave = np.array(wave)[start:end]
            wave = wave[None]
            sources.append(wave)

        sources = np.concatenate(sources, axis=0)

        mixture_data = data['mixture']
        start, end = mixture_data['start'], mixture_data['end']
        wav_path = os.path.join(self.wav_root, mixture_data['path'])
        wave, sr = read_wav(wav_path)
        wave = np.array(wave)[start:end]
        mixture = wave[None]

        segment_ID = self.json_data[idx]['ID'] + '_{}-{}'.format(start, end)

        mixture = torch.Tensor(mixture).float()
        sources = torch.Tensor(sources).float()

        return mixture, sources, segment_ID
    def __init__(self, wav_root, list_path, max_samples=None, max_n_sources=3):
        super().__init__(wav_root, list_path, max_n_sources=max_n_sources)
        
        self.json_data = []
        
        with open(list_path) as f:
            for line in f:
                ID = line.strip()
                wav_path = os.path.join(wav_root, 'mix', '{}.wav'.format(ID))
                
                y, sr = read_wav(wav_path)
                T_total = len(y)
                
                if max_samples is None:
                    samples = T_total
                else:
                    if T_total < max_samples:
                        samples = T_total
                    else:
                        samples = max_samples
                
                n_sources = 0

                for source_idx in range(max_n_sources):
                    wav_path = os.path.join(wav_root, 's{}'.format(source_idx+1), '{}.wav'.format(ID))
                    if not os.path.exists(wav_path):
                        break
                    n_sources += 1
                
                data = {
                    'sources': {},
                    'mixture': {}
                }
                
                for source_idx in range(n_sources):
                    source_data = {
                        'path': os.path.join('s{}'.format(source_idx+1), '{}.wav'.format(ID)),
                        'start': 0,
                        'end': samples
                    }
                    data['sources']['s{}'.format(source_idx+1)] = source_data
                
                mixture_data = {
                    'path': os.path.join('mix', '{}.wav'.format(ID)),
                    'start': 0,
                    'end': samples
                }
                data['mixture'] = mixture_data
                data['ID'] = ID
            
                self.json_data.append(data)
Пример #4
0
    def __init__(self,
                 wav_root,
                 list_path,
                 samples=32000,
                 overlap=None,
                 n_sources=2):
        super().__init__(wav_root, list_path, n_sources=n_sources)

        if overlap is None:
            overlap = samples // 2

        self.json_data = []

        with open(list_path) as f:
            for line in f:
                ID = line.strip()
                wav_path = os.path.join(wav_root, 'mix', '{}.wav'.format(ID))

                y, sr = read_wav(wav_path)

                T_total = len(y)

                for start_idx in range(0, T_total, samples - overlap):
                    end_idx = start_idx + samples
                    if end_idx > T_total:
                        break
                    data = {'sources': {}, 'mixture': {}}

                    for source_idx in range(n_sources):
                        source_data = {
                            'path':
                            os.path.join('s{}'.format(source_idx + 1),
                                         '{}.wav'.format(ID)),
                            'start':
                            start_idx,
                            'end':
                            end_idx
                        }
                        data['sources']['s{}'.format(source_idx +
                                                     1)] = source_data

                    mixture_data = {
                        'path': os.path.join('mix', '{}.wav'.format(ID)),
                        'start': start_idx,
                        'end': end_idx
                    }
                    data['mixture'] = mixture_data
                    data['ID'] = ID

                    self.json_data.append(data)
    def __init__(self, wav_root, list_path, fft_size, hop_size=None, window_fn='hann', normalize=False, mask_type='ibm', threshold=40, max_samples=None, n_sources=2, eps=EPS):
        super().__init__(wav_root, list_path, fft_size, hop_size=hop_size, window_fn=window_fn, normalize=normalize, mask_type=mask_type, threshold=threshold, n_sources=n_sources, eps=eps)

        self.json_data = []
        
        with open(list_path) as f:
            for line in f:
                ID = line.strip()
                wav_path = os.path.join(wav_root, 'mix', '{}.wav'.format(ID))
                
                y, sr = read_wav(wav_path)
                
                T_total = len(y)
                
                if max_samples is None:
                    samples = T_total
                else:
                    if T_total < max_samples:
                        samples = T_total
                    else:
                        samples = max_samples
                
                data = {
                    'sources': {},
                    'mixture': {}
                }
                
                for source_idx in range(n_sources):
                    source_data = {
                        'path': os.path.join('s{}'.format(source_idx+1), '{}.wav'.format(ID)),
                        'start': 0,
                        'end': samples
                    }
                    data['sources']['s{}'.format(source_idx+1)] = source_data
                
                mixture_data = {
                    'path': os.path.join('mix', '{}.wav'.format(ID)),
                    'start': 0,
                    'end': samples
                }
                data['mixture'] = mixture_data
                data['ID'] = ID
            
                self.json_data.append(data)
Пример #6
0
def _test(metric='EUC'):
    torch.manual_seed(111)

    fft_size, hop_size = 1024, 256
    n_bases = 6
    iteration = 100
    
    signal, sr = read_wav("data/music-8000.wav")
    
    T = len(signal)
    signal = torch.Tensor(signal).unsqueeze(dim=0)
    
    stft = BatchSTFT(fft_size=fft_size, hop_size=hop_size)
    istft = BatchInvSTFT(fft_size=fft_size, hop_size=hop_size)

    spectrogram = stft(signal).squeeze(dim=0)
    real = spectrogram[...,0]
    imag = spectrogram[...,1]
    amplitude = torch.sqrt(real**2 + imag**2)
    power = amplitude**2

    log_spectrogram = 10 * torch.log10(power + EPS)
    plt.figure()
    plt.pcolormesh(log_spectrogram, cmap='jet')
    plt.colorbar()
    plt.savefig('data/NMF/spectrogram.png', bbox_inches='tight')
    plt.close()

    nmf = NMF(n_bases, metric=metric)
    nmf.update(power, iteration=iteration)

    estimated_power = torch.matmul(nmf.base, nmf.activation)
    estimated_amplitude = torch.sqrt(estimated_power)
    ratio = estimated_amplitude / (amplitude + EPS)
    estimated_real, estimated_imag = ratio * real, ratio * imag
    estimated_spectrogram = torch.cat([estimated_real.unsqueeze(dim=2), estimated_imag.unsqueeze(dim=2)], dim=2).unsqueeze(dim=0)

    estimated_signal = istft(estimated_spectrogram, T=T)
    estimated_signal = estimated_signal.squeeze(dim=0).numpy()
    estimated_signal = estimated_signal / np.abs(estimated_signal).max()
    write_wav("data/NMF/{}/music-8000-estimated-iter{}.wav".format(metric, iteration), signal=estimated_signal, sr=8000)

    for idx in range(n_bases):
        estimated_power = torch.matmul(nmf.base[:, idx: idx+1], nmf.activation[idx: idx+1, :])
        estimated_amplitude = torch.sqrt(estimated_power)
        ratio = estimated_amplitude / (amplitude + EPS)
        estimated_real, estimated_imag = ratio * real, ratio * imag
        estimated_spectrogram = torch.cat([estimated_real.unsqueeze(dim=2), estimated_imag.unsqueeze(dim=2)], dim=2).unsqueeze(dim=0)

        estimated_signal = istft(estimated_spectrogram, T=T)
        estimated_signal = estimated_signal.squeeze(dim=0).numpy()
        estimated_signal = estimated_signal / np.abs(estimated_signal).max()
        write_wav("data/NMF/{}/music-8000-estimated-iter{}-base{}.wav".format(metric, iteration, idx), signal=estimated_signal, sr=8000)

        log_spectrogram = 10 * torch.log10(estimated_power + EPS).numpy()
        plt.figure()
        plt.pcolormesh(log_spectrogram, cmap='jet')
        plt.colorbar()
        plt.savefig('data/NMF/{}/estimated-spectrogram-iter{}-base{}.png'.format(metric, iteration, idx), bbox_inches='tight')
        plt.close()
    
    plt.figure()
    plt.plot(nmf.loss)
    plt.savefig('data/NMF/{}/loss.png'.format(metric), bbox_inches='tight')
    plt.close()
Пример #7
0

if __name__ == '__main__':
    import os
    import numpy as np
    from scipy.signal import resample_poly

    from utils.utils_audio import read_wav, write_wav

    os.makedirs("data/GriffinLim", exist_ok=True)
    torch.manual_seed(111)

    fft_size, hop_size = 1024, 256
    n_basis = 4

    signal, sr = read_wav("data/man-44100.wav")
    signal = resample_poly(signal, up=16000, down=sr)
    write_wav("data/man-16000.wav", signal=signal, sr=16000)

    T = len(signal)
    signal = torch.Tensor(signal).unsqueeze(dim=0)

    stft = BatchSTFT(fft_size=fft_size, hop_size=hop_size)
    istft = BatchInvSTFT(fft_size=fft_size, hop_size=hop_size)

    spectrogram = stft(signal)
    oracle_signal = istft(spectrogram, T=T)
    oracle_signal = oracle_signal.squeeze(dim=0).numpy()
    write_wav("data/man-oracle.wav", signal=oracle_signal, sr=16000)

    griffin_lim = GriffinLim(fft_size, hop_size=hop_size)
    return mask


if __name__ == '__main__':
    import numpy as np
    from scipy.signal import resample_poly

    from utils.utils_audio import read_wav, write_wav
    from stft import BatchSTFT, BatchInvSTFT

    torch.manual_seed(111)

    fft_size, hop_size = 1024, 256
    n_basis = 4

    source1, sr = read_wav("data/man-44100.wav")
    source1 = resample_poly(source1, up=16000, down=sr)
    write_wav("data/man-16000.wav", signal=source1, sr=16000)
    T = len(source1)

    source2, sr = read_wav("data/woman-44100.wav")
    source2 = resample_poly(source2, up=16000, down=sr)
    write_wav("data/woman-16000.wav", signal=source2, sr=16000)

    mixture = source1 + source2
    write_wav("data/mixture-16000.wav", signal=mixture, sr=16000)

    stft = BatchSTFT(fft_size=fft_size, hop_size=hop_size)
    istft = BatchInvSTFT(fft_size=fft_size, hop_size=hop_size)

    mixture = torch.Tensor(mixture).unsqueeze(dim=0)
Пример #9
0
def _test(metric='EUC'):
    np.random.seed(111)

    fft_size, hop_size = 1024, 256
    n_bases = 6
    iteration = 100

    signal, sr = read_wav("data/single-channel/music-8000.wav")

    T = len(signal)

    spectrogram = stft(signal, fft_size=fft_size, hop_size=hop_size)
    amplitude = np.abs(spectrogram)
    power = amplitude**2

    if metric == 'EUC':
        nmf = EUCNMF(n_bases)
    elif metric == 'IS':
        nmf = ISNMF(n_bases)
    elif metric == 'KL':
        nmf = KLNMF(n_bases)
    else:
        raise NotImplementedError("Not support {}-NMF".format(metric))

    nmf.update(power, iteration=iteration)

    amplitude[amplitude < EPS] = EPS

    estimated_power = nmf.base @ nmf.activation
    estimated_amplitude = np.sqrt(estimated_power)
    ratio = estimated_amplitude / amplitude
    estimated_spectrogram = ratio * spectrogram

    estimated_signal = istft(estimated_spectrogram,
                             fft_size=fft_size,
                             hop_size=hop_size,
                             length=T)
    estimated_signal = estimated_signal / np.abs(estimated_signal).max()
    write_wav("data/NMF/{}/music-8000-estimated-iter{}.wav".format(
        metric, iteration),
              signal=estimated_signal,
              sr=8000)

    power[power < EPS] = EPS
    log_spectrogram = 10 * np.log10(power)

    plt.figure()
    plt.pcolormesh(log_spectrogram, cmap='jet')
    plt.colorbar()
    plt.savefig('data/NMF/spectrogram.png', bbox_inches='tight')
    plt.close()

    for idx in range(n_bases):
        estimated_power = nmf.base[:, idx:idx + 1] @ nmf.activation[idx:idx +
                                                                    1, :]
        estimated_amplitude = np.sqrt(estimated_power)
        ratio = estimated_amplitude / amplitude
        estimated_spectrogram = ratio * spectrogram

        estimated_signal = istft(estimated_spectrogram,
                                 fft_size=fft_size,
                                 hop_size=hop_size,
                                 length=T)
        estimated_signal = estimated_signal / np.abs(estimated_signal).max()
        write_wav("data/NMF/{}/music-8000-estimated-iter{}-base{}.wav".format(
            metric, iteration, idx),
                  signal=estimated_signal,
                  sr=8000)

        estimated_power[estimated_power < EPS] = EPS
        log_spectrogram = 10 * np.log10(estimated_power)

        plt.figure()
        plt.pcolormesh(log_spectrogram, cmap='jet')
        plt.colorbar()
        plt.savefig(
            'data/NMF/{}/estimated-spectrogram-iter{}-base{}.png'.format(
                metric, iteration, idx),
            bbox_inches='tight')
        plt.close()

    plt.figure()
    plt.plot(nmf.loss, color='black')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.savefig('data/NMF/{}/loss.png'.format(metric), bbox_inches='tight')
    plt.close()