示例#1
0
def slicer():
    audioformat = "*.wav"
    audio_length = 10
    fs = 16000
    audio_dir = os.path.join(os.path.dirname(__file__), 'noise_train')
    filenames = glob.glob(os.path.join(audio_dir, audioformat))
    audio_length = int(audio_length * fs)
    if not os.path.exists(audio_dir):
        os.makedirs(audio_dir + "_new")
    for wavfile in filenames:
        file, fs = audioread(wavfile)

        for i in range(0, len(file) // audio_length):
            newfile = file[i:i + audio_length]
            newdir = os.path.join(
                os.path.dirname(__file__), 'noise_train_new\\' +
                wavfile.split("\\")[-1].split(".")[0] + "_" + str(i) + ".wav")
            #print(wavfile)
            #print(newdir)
            audiowrite(newfile, fs, newdir, norm=False)

    return
示例#2
0
def main(cfg):
    mixture_dir, clean_dir, noise_dir = make_output_dirs(cfg)

    audio_format = cfg["audio_format"]
    speech_files, noise_files = get_speech_and_noise_files(cfg)

    snr_lower = cfg["snr_lower"]
    snr_upper = cfg["snr_upper"]
    total_snrlevels = cfg["total_snrlevels"]
    fs = cfg["fs"]
    total_hours = cfg["total_hours"]
    audio_length = cfg["audio_length"]
    silence_length = cfg["silence_length"]

    total_num_mixtures = int(total_hours * 60 * 60 // audio_length)

    for cur_mix_idx in tqdm.tqdm(range(total_num_mixtures)):

        idx_s = np.random.randint(0, np.size(speech_files))
        base_clean, fs = audioread(speech_files[idx_s])
        base_clean = concat_to_size(base_clean, speech_files, idx_s, audio_length, silence_length, fs)

        idx_n = np.random.randint(0, np.size(noise_files))
        base_noise, fs = audioread(noise_files[idx_n])
        base_noise = concat_to_size(base_noise[:len(base_clean)], speech_files, idx_s, audio_length, silence_length, fs)
        base_noise = base_noise[:len(base_clean)]

        for snr_db in np.linspace(snr_lower, snr_upper, total_snrlevels):
            clean, noise = base_clean.copy(), base_noise.copy()
            clean, noise = snr_setter(clean=clean, noise=noise, snr=snr_db)
            
            
            if cfg['room_type'] == 'single_k':
                clean, noise, mixtures = simulate_k_room(clean, noise, cfg)
            elif cfg['room_type'] == 'multi_k':
                cur_cfg = cfg.copy()
                selected_k = np.random.choice(cfg['k_choices'])
                cur_cfg['n_noise_mics'], cur_cfg['n_speech_mics'] = selected_k['n_noise_mics'], selected_k['n_speech_mics']
                clean, noise, mixtures = simulate_k_room(clean, noise, cur_cfg)
            else:
                clean, noise, mixtures = simulate_room(
                    clean, noise, cfg, cur_mix_idx)
            
            
            clean, noise, mixtures = clean[:audio_length * fs], noise[:audio_length * fs], mixtures[:audio_length * fs]

            clean_fname = '{}_SNR_{}'.format(cur_mix_idx, snr_db) + audio_format
            noise_fname = '{}_SNR_{}'.format(cur_mix_idx, snr_db) + audio_format
            mix_fname = '{}_SNR_{}'.format(cur_mix_idx, snr_db) + audio_format

            clean_path = os.path.join(clean_dir, clean_fname)
            noise_path = os.path.join(noise_dir, noise_fname)
            mix_path = os.path.join(mixture_dir, mix_fname)

            audiowrite(clean, fs, clean_path, norm=False)
            audiowrite(noise, fs, noise_path, norm=False)
            audiowrite(mixtures, fs, mix_path, norm=False)
示例#3
0
def main_gen(params):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''

    clean_source_files = []
    clean_clipped_files = []
    clean_low_activity_files = []
    noise_source_files = []
    noise_clipped_files = []
    noise_low_activity_files = []

    clean_index = 0
    noise_index = 0
    file_num = params['fileindex_start']

    while file_num <= params['fileindex_end']:
        # generate clean speech
        clean, clean_sf, clean_cf, clean_laf, clean_index = \
            gen_audio(True, params, clean_index)

        # add reverb with selected RIR
        rir_index = random.randint(0,len(params['myrir'])-1)
        
        my_rir = os.path.normpath(os.path.join('datasets', 'impulse_responses', params['myrir'][rir_index]))
        (fs_rir,samples_rir) = wavfile.read(my_rir)

        my_channel = int(params['mychannel'][rir_index])
        
        if samples_rir.ndim==1:
            samples_rir_ch = np.array(samples_rir)
            
        elif my_channel > 1:
            samples_rir_ch = samples_rir[:, my_channel -1]
        else:
            samples_rir_ch = samples_rir[:, my_channel -1]
            #print(samples_rir.shape)
            #print(my_channel)

        clean = add_pyreverb(clean, samples_rir_ch)

        # generate noise
        noise, noise_sf, noise_cf, noise_laf, noise_index = \
            gen_audio(False, params, noise_index, len(clean))

        clean_clipped_files += clean_cf
        clean_low_activity_files += clean_laf
        noise_clipped_files += noise_cf
        noise_low_activity_files += noise_laf

        # get rir files and config

        # mix clean speech and noise
        # if specified, use specified SNR value
        if not params['randomize_snr']:
            snr = params['snr']
        # use a randomly sampled SNR value between the specified bounds
        else:
            snr = np.random.randint(params['snr_lower'], params['snr_upper'])

        clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
                                                                  clean=clean, 
                                                                  noise=noise, 
                                                                  snr=snr)
        # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer
        #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
        #                                                         clean=clean, 
        #                                                          noise=noise, 
        #                                                         snr=snr)
        # unexpected clipping
        if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr):
            print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                  "returning without writing audio to disk")
            continue

        clean_source_files += clean_sf
        noise_source_files += noise_sf

        # write resultant audio streams to files
        hyphen = '-'
        clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_sf]
        clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN]
        noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_sf]
        noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN]

        noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \
                        str(snr) + '_tl' + str(target_level) + '_fileid_' + str(file_num) + '.wav'
        cleanfilename = 'clean_fileid_'+str(file_num)+'.wav'
        noisefilename = 'noise_fileid_'+str(file_num)+'.wav'

        noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename)
        cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename)
        noisepath = os.path.join(params['noise_proc_dir'], noisefilename)

        audio_signals = [noisy_snr, clean_snr, noise_snr]
        file_paths = [noisypath, cleanpath, noisepath]

        file_num += 1
        for i in range(len(audio_signals)):
            try:
                audiowrite(file_paths[i], audio_signals[i], params['fs'])
            except Exception as e:
                print(str(e))


    return clean_source_files, clean_clipped_files, clean_low_activity_files, \
           noise_source_files, noise_clipped_files, noise_low_activity_files
示例#4
0
    def __call__(self, noisy_speech_filename, output_dir=None):
        "Apply NSNet model to one file and produce an output file with clean speech."

        enhanced_filename = os.path.join(output_dir or self.output_dir,
                                         os.path.basename(noisy_speech_filename))

        logging.info("NSNet inference: %s", noisy_speech_filename)
        sig, sample_rate = sf.read(noisy_speech_filename)

        ssize = len(sig)
        print('ssize:', ssize)
        fsize = len(self.wind)
        hsize = int(self.hop_fraction * self.framesize)

        sstart = hsize - fsize
        print('sstart:', sstart)
        send = ssize
        nframe = math.ceil((send - sstart) / hsize)
        zpleft = -sstart
        zpright = (nframe - 1) * hsize + fsize - zpleft - ssize

        if zpleft > 0 or zpright > 0:
            sigpad = np.zeros(ssize + zpleft + zpright)
            sigpad[zpleft:len(sigpad)-zpright] = sig
        else:
            sigpad = sig

        sout = np.zeros(nframe * hsize)
        x_old = np.zeros(hsize)

        model_input_names = [inp.name for inp in self.model.get_inputs()]
        model_inputs = {
            inp.name: np.zeros(
                [dim if isinstance(dim, int) else 1 for dim in inp.shape],
                dtype=np.float32)
            for inp in self.model.get_inputs()[1:]}

        mu = None
        sigmasquare = None
        frame_count = 0

        for frame_sampleindex in range(0, nframe * hsize, hsize):

            # second frame starts from mid-of first frame and goes until frame-size
            sigpadframe = sigpad[frame_sampleindex:frame_sampleindex + fsize] * self.wind

            xmag, xphs = audiolib.magphasor(audiolib.stft(
                sigpadframe, self.sampling_rate, self.wind,
                self.hop_fraction, self.dft_size, synth=True, zphase=False))

            feat = audiolib.logpow(xmag, floor=self.spectral_floor)

            if frame_sampleindex == 0:
                mu = feat
                sigmasquare = feat**2

            norm_feat, mu, sigmasquare, frame_count = audiolib.onlineMVN_perframe(
                feat, frame_counter=frame_count, mu=mu, sigmasquare=sigmasquare,
                frameshift=0.01, tauFeat=3., tauFeatInit=0.1, t_init=0.1)

            norm_feat = norm_feat[np.newaxis, np.newaxis, :]

            model_inputs['input'] = np.float32(norm_feat)
            model_outputs = self.model.run(None, model_inputs)
            model_inputs = dict(zip(model_input_names, model_outputs))

            mask = model_outputs[0].squeeze()
            x_enh = audiolib.istft(
                (xmag * mask) * xphs, sample_rate, self.wind, self.dft_size, zphase=False)
            
            sout[frame_sampleindex:frame_sampleindex + hsize] = x_old + x_enh[0:hsize]
            x_old = x_enh[hsize:fsize]

        xfinal = sout
        audiolib.audiowrite(xfinal, sample_rate, enhanced_filename, norm=False)
def main_gen(params, filenum):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''

    print("Generating file #" + str(filenum))

    clean_clipped_files = []
    clean_low_activity_files = []
    noise_clipped_files = []
    noise_low_activity_files = []

    while True:
        # generate clean speech
        clean, clean_source_files, clean_cf, clean_laf = \
            gen_audio(True, params, filenum)
        # generate noise
        noise, noise_source_files, noise_cf, noise_laf = \
            gen_audio(False, params, filenum, len(clean))

        clean_clipped_files += clean_cf
        clean_low_activity_files += clean_laf
        noise_clipped_files += noise_cf
        noise_low_activity_files += noise_laf

        # mix clean speech and noise
        # if specified, use specified SNR value
        if not params['randomize_snr']:
            snr = params['snr']
        # use a randomly sampled SNR value between the specified bounds
        else:
            snr = np.random.randint(params['snr_lower'], params['snr_upper'])
            
        clean_snr, noise_snr, noisy_snr, target_level = snr_mixer(params=params, 
                                                                  clean=clean, 
                                                                  noise=noise, 
                                                                  snr=snr)
        # Uncomment the below lines if you need segmental SNR and comment the above lines using snr_mixer
        #clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(params=params, 
        #                                                                    clean=clean, 
        #                                                                    noise=noise, 
        #                                                                    snr=snr)
        # unexpected clipping
        if is_clipped(clean_snr) or is_clipped(noise_snr) or is_clipped(noisy_snr):       
            continue
        else:
            break

    # write resultant audio streams to files
    hyphen = '-'
    clean_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in clean_source_files]
    clean_files_joined = hyphen.join(clean_source_filenamesonly)[:MAXFILELEN]
    noise_source_filenamesonly = [i[:-4].split(os.path.sep)[-1] for i in noise_source_files]
    noise_files_joined = hyphen.join(noise_source_filenamesonly)[:MAXFILELEN]

    noisyfilename = clean_files_joined + '_' + noise_files_joined + '_snr' + \
                    str(snr) + '_fileid_' + str(filenum) + '.wav'
    cleanfilename = 'clean_fileid_'+str(filenum)+'.wav'
    noisefilename = 'noise_fileid_'+str(filenum)+'.wav'

    noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename)
    cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename)
    noisepath = os.path.join(params['noise_proc_dir'], noisefilename)

    audio_signals = [noisy_snr, clean_snr, noise_snr]
    file_paths = [noisypath, cleanpath, noisepath]
    
    for i in range(len(audio_signals)):
        try:
            audiowrite(file_paths[i], audio_signals[i], params['fs'])
        except Exception as e:
            print(str(e))
            pass

    return clean_source_files, clean_clipped_files, clean_low_activity_files, \
           noise_source_files, noise_clipped_files, noise_low_activity_files
def simulate_room(clean, noise, cfg, rand_seed, debug=False):
    n_mics = cfg['num_mics']
    n_inter = cfg['n_inter_locs']
    room_size_upper = cfg['room_size_upper']
    room_size_lower = cfg['room_size_lower']
    room_geom_fixed = cfg['room_geom_fixed']
    
    # each mixture has the same room across db levels
    np.random.seed(rand_seed)

    if room_geom_fixed:
        room_size = np.array([room_size_upper] * 3)

        # random seed holding so that we get the same random room but not the
        # same source source locs
        cur_rand_state = np.random.get_state()
        np.random.seed(42)
        mic_locs = [np.random.uniform(0, dim, n_mics) for dim in room_size]
        np.random.set_state(cur_rand_state)

    else:
        room_size = np.random.uniform(room_size_lower, room_size_upper, size=(3))
        mic_locs = [np.random.uniform(0, dim, n_mics) for dim in room_size]

    room = pra.ShoeBox(room_size,
                        fs=cfg['fs'],
                        absorption=0.35,
                        max_order=10)

    mic_locs = np.array(mic_locs).T
    mic_array = pra.MicrophoneArray(mic_locs.T, room.fs)
    room.add_microphone_array(mic_array)

    speech_locs = [np.linspace(runif(0, dim), runif(0, dim), n_inter) for i, dim in enumerate(room_size)]
    speech_locs = np.array(speech_locs).T

    noise_locs = [np.linspace(runif(0, dim), runif(0, dim), n_inter) for i, dim in enumerate(room_size)]
    noise_locs = np.array(noise_locs).T

    chunk_size = len(clean) // n_inter
    fade_overlap = chunk_size // 4
    for i in range(len(speech_locs)):
        cur_speech_clip = np.zeros(len(clean))
        cur_noise_clip = np.zeros(len(noise))
        start_idx = max(0, i * chunk_size - fade_overlap)
        end_idx = min(len(clean), (i + 1) * chunk_size + fade_overlap)

        cur_speech_clip[start_idx:end_idx] = clean[start_idx:end_idx].reshape(-1)
        cur_noise_clip[start_idx:end_idx] = noise[start_idx:end_idx].reshape(-1)

        # since may not be exactly divisble on the last one grab all the rest
        if i == (len(speech_locs) - 1):
            cur_speech_clip[start_idx:] = clean[start_idx:].reshape(-1)
            cur_noise_clip[start_idx:] = noise[start_idx:].reshape(-1)

        cur_speech_clip = fade_signal(cur_speech_clip, start_idx, end_idx,
                            chunk_size, fade_overlap, len(speech_locs), i)
        cur_noise_clip = fade_signal(cur_noise_clip, start_idx, end_idx,
                            chunk_size, fade_overlap, len(speech_locs), i)

        room.add_source(speech_locs[i].reshape(-1), signal=cur_speech_clip, delay=0)
        room.add_source(noise_locs[i].reshape(-1), signal=cur_noise_clip, delay=0)

    res = room.simulate(return_premix=True)

    clean_ref = res[np.arange(n_inter) * 2, 0, :].sum(0, keepdims=True)
    noise_ref = res[np.arange(n_inter) * 2 + 1, 0, :].sum(0, keepdims=True)

    if debug:
        import matplotlib.pyplot as plt

        room.plot(freq=[1000, 2000, 4000, 8000], img_order=0)
        plt.savefig('room.png')

        fig, ax = plt.subplots(len(res.sum(0)), 1)
        for i in range(len(ax)):
            ax[i].plot(res.sum(0)[i])
            audiolib.audiowrite(res.sum(0)[i, None].T, cfg['fs'], './debug/res_{}.wav'.format(i))
        plt.savefig('./debug/res.png')

    # soundfile expects an N x C array for multichannel audio
    clean_final = clean_ref.T if cfg['echoic_ref_clean'] else clean.reshape((-1, 1))
    noise_final = noise_ref.T if cfg['echoic_ref_noise'] else noise.reshape((-1, 1))

    return clean_final, noise_final, res.sum(0).T
def main(cfg):
    snr_lower = float(cfg["snr_lower"])
    snr_upper = float(cfg["snr_upper"])
    total_snrlevels = float(cfg["total_snrlevels"])

    clean_dir = os.path.join(os.path.dirname(__file__), 'clean_train')
    if cfg["speech_dir"] != 'None':
        clean_dir = cfg["speech_dir"]
    if not os.path.exists(clean_dir):
        assert False, ("Clean speech data is required")

    noise_dir = os.path.join(os.path.dirname(__file__), 'noise_train')
    if cfg["noise_dir"] != 'None':
        noise_dir = cfg["noise_dir"]
    if not os.path.exists(noise_dir):
        assert False, ("Noise data is required")

    fs = float(cfg["sampling_rate"])
    audioformat = cfg["audioformat"]
    total_hours = float(cfg["total_hours"])
    audio_length = float(cfg["audio_length"])
    silence_length = float(cfg["silence_length"])
    noisyspeech_dir = os.path.join(os.path.dirname(__file__),
                                   'NoisySpeech_training')
    if not os.path.exists(noisyspeech_dir):
        os.makedirs(noisyspeech_dir)
    clean_proc_dir = os.path.join(os.path.dirname(__file__),
                                  'CleanSpeech_training')
    if not os.path.exists(clean_proc_dir):
        os.makedirs(clean_proc_dir)
    noise_proc_dir = os.path.join(os.path.dirname(__file__), 'Noise_training')
    if not os.path.exists(noise_proc_dir):
        os.makedirs(noise_proc_dir)

    total_secs = total_hours * 60 * 60
    total_samples = int(total_secs * fs)
    audio_length = int(audio_length * fs)
    SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
    cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))
    if cfg["noise_types_excluded"] == 'None':
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
    else:
        filestoexclude = cfg["noise_types_excluded"].split(',')
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
        for i in range(len(filestoexclude)):
            noisefilenames = [
                fn for fn in noisefilenames
                if not os.path.basename(fn).startswith(filestoexclude[i])
            ]

    filecounter = 0
    num_samples = 0

    while num_samples < total_samples:
        idx_s = np.random.randint(0, np.size(cleanfilenames))
        clean, fs = audioread(cleanfilenames[idx_s])

        if len(clean) > audio_length:
            clean = clean

        else:

            while len(clean) <= audio_length:
                idx_s = idx_s + 1
                if idx_s >= np.size(cleanfilenames) - 1:
                    idx_s = np.random.randint(0, np.size(cleanfilenames))
                newclean, fs = audioread(cleanfilenames[idx_s])
                cleanconcat = np.append(clean,
                                        np.zeros(int(fs * silence_length)))
                clean = np.append(cleanconcat, newclean)

        idx_n = np.random.randint(0, np.size(noisefilenames))
        noise, fs = audioread(noisefilenames[idx_n])

        if len(noise) >= len(clean):
            noise = noise[0:len(clean)]

        else:

            while len(noise) <= len(clean):
                idx_n = idx_n + 1
                if idx_n >= np.size(noisefilenames) - 1:
                    idx_n = np.random.randint(0, np.size(noisefilenames))
                newnoise, fs = audioread(noisefilenames[idx_n])
                noiseconcat = np.append(noise,
                                        np.zeros(int(fs * silence_length)))
                noise = np.append(noiseconcat, newnoise)
        noise = noise[0:len(clean)]
        filecounter = filecounter + 1

        for i in range(np.size(SNR)):
            clean_snr, noise_snr, noisy_snr = snr_mixer(clean=clean,
                                                        noise=noise,
                                                        snr=SNR[i])
            noisyfilename = 'noisy' + str(filecounter) + '_SNRdb_' + str(
                SNR[i]) + '_clnsp' + str(filecounter) + '.wav'
            cleanfilename = 'clnsp' + str(filecounter) + '.wav'
            noisefilename = 'noisy' + str(filecounter) + '_SNRdb_' + str(
                SNR[i]) + '.wav'
            noisypath = os.path.join(noisyspeech_dir, noisyfilename)
            cleanpath = os.path.join(clean_proc_dir, cleanfilename)
            noisepath = os.path.join(noise_proc_dir, noisefilename)
            audiowrite(noisy_snr, fs, noisypath, norm=False)
            audiowrite(clean_snr, fs, cleanpath, norm=False)
            audiowrite(noise_snr, fs, noisepath, norm=False)
            num_samples = num_samples + len(noisy_snr)
def main(cfg):
    snr_lower = int(cfg["snr_lower"])
    snr_upper = int(cfg["snr_upper"])
    total_snrlevels = int(cfg["total_snrlevels"])

    clean_dir = os.path.join(os.path.dirname(__file__), 'clean_train')
    if cfg["speech_dir"] != 'None':
        clean_dir = cfg["speech_dir"]
    if not os.path.exists(clean_dir):
        assert False, ("Clean speech data is required")
    try:
        noise_dir = os.path.join(os.path.dirname(__file__), 'noise_train_new')
        if cfg["noise_dir"] != 'None':
            noise_dir = cfg["noise_dir"]
        if not os.path.exists(noise_dir):
            raise Exception("path does not exist")
        if not len(os.listdir(noise_dir)):
            raise Exception("folder is empty")
    except:
        res = input(
            "Can't find noise data. Do you want to run the audio slicer?\nEnter [Y]es [N]o\n"
        )
        if res.lower() in ["y", "yes", ""]:
            slicer()
            print("Sliced successfully.\n\n")

        else:
            input("Noise data is required.\nPress any key to exit.\n")
            sys.exit()

    fs = float(cfg["sampling_rate"])
    audioformat = cfg["audioformat"]
    total_hours = float(cfg["total_hours"])
    audio_length = float(cfg["audio_length"])
    silence_length = float(cfg["silence_length"])
    noisyspeech_dir = os.path.join(os.path.dirname(__file__), 'mix')
    if not os.path.exists(noisyspeech_dir):
        os.makedirs(noisyspeech_dir)
    clean_proc_dir = os.path.join(os.path.dirname(__file__), 's1')
    if not os.path.exists(clean_proc_dir):
        os.makedirs(clean_proc_dir)
    noise_proc_dir = os.path.join(os.path.dirname(__file__), 's2')
    if not os.path.exists(noise_proc_dir):
        os.makedirs(noise_proc_dir)

    total_secs = total_hours * 60 * 60
    total_samples = int(total_secs * fs)
    audio_length = int(audio_length * fs)
    SNR = np.linspace(snr_lower, snr_upper, total_snrlevels)
    cleanfilenames = glob.glob(os.path.join(clean_dir, audioformat))

    if cfg["noise_types_excluded"] == 'None':
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
    else:
        filestoexclude = cfg["noise_types_excluded"].split(',')
        noisefilenames = glob.glob(os.path.join(noise_dir, audioformat))
        for i in range(len(filestoexclude)):
            noisefilenames = [
                fn for fn in noisefilenames
                if not os.path.basename(fn).startswith(filestoexclude[i])
            ]

    filecounter = 0
    num_samples = 0

    while num_samples < total_samples:
        idx_s = np.random.randint(0, np.size(cleanfilenames))
        clean, fs = audioread(cleanfilenames[idx_s])

        if len(clean) == audio_length:
            clean = clean

        else:

            while len(clean) <= audio_length:
                idx_s = idx_s + 1
                if idx_s >= np.size(cleanfilenames) - 1:
                    idx_s = np.random.randint(0, np.size(cleanfilenames))
                newclean, fs = audioread(cleanfilenames[idx_s])
                cleanconcat = np.append(clean,
                                        np.zeros(int(fs * silence_length)))
                clean = np.append(cleanconcat, newclean)
        clean = clean[0:audio_length]
        idx_n = np.random.randint(0, np.size(noisefilenames))
        noise, fs = audioread(noisefilenames[idx_n])

        if len(noise) >= len(clean):
            noise = noise[0:len(clean)]

        else:

            while len(noise) <= len(clean):
                idx_n = idx_n + 1
                if idx_n >= np.size(noisefilenames) - 1:
                    idx_n = np.random.randint(0, np.size(noisefilenames))
                newnoise, fs = audioread(noisefilenames[idx_n])
                noiseconcat = np.append(noise,
                                        np.zeros(int(fs * silence_length)))
                noise = np.append(noiseconcat, newnoise)
        noise = noise[0:len(clean)]
        filecounter = filecounter + 1

        for i in range(np.size(SNR)):
            clean_snr, noise_snr, noisy_snr = snr_mixer(clean=clean,
                                                        noise=noise,
                                                        snr=SNR[i])
            noisyfilename = 'noisy' + str(filecounter) + '_SNRdb_' + str(
                SNR[i]) + noisefilenames[idx_n].split("\\")[-1].split(
                    ".")[0] + '.wav'
            cleanfilename = 'clnsp' + str(filecounter) + '_.wav'
            noisefilename = 'noise' + str(filecounter) + '_SNRdb_' + str(
                SNR[i]) + noisefilenames[idx_n].split("\\")[-1].split(
                    ".")[0] + '.wav'
            noisypath = os.path.join(noisyspeech_dir, noisyfilename)
            cleanpath = os.path.join(clean_proc_dir, cleanfilename)
            noisepath = os.path.join(noise_proc_dir, noisefilename)
            audiowrite(noisy_snr, fs, noisypath, norm=False)
            audiowrite(clean_snr, fs, cleanpath, norm=False)
            audiowrite(noise_snr, fs, noisepath, norm=False)
            num_samples = num_samples + len(noisy_snr)
示例#9
0
from audiolib import audioread, audiowrite, snr_mixer
from os.path import isfile, join,  basename
import os
import numpy as np


"""
Given a source folder, add white-noise in a range of different SNR levels to all files in the source folder.
"""

if __name__=="__main__":

    source_folder = "clips"
    source_files = [join(source_folder, f) for f in os.listdir(source_folder) if isfile(join(source_folder, f))]
    output_folder = source_folder+"_snr"
    snr_min = 30
    snr_max = 50

    for f in source_files:
        clean, fs = audioread(f)
        # white-noise
        noise = np.random.normal(0, 1, len(clean))
        for i in range (snr_min,snr_max):
            clean_snr, noise_snr, noisy_snr = snr_mixer(clean=clean, noise=noise, snr=i)
            output_filename = join(output_folder,
                                   f'{i}S_{os.path.splitext(basename(f))[0]}.wav')
            audiowrite(noisy_snr, fs,output_filename, norm=False)


示例#10
0
    def __getitem__(self, idx):
        (file_idx, file_db_suffix) = self.file_labels[idx]
        file_name = file_idx + '_SNR_' + file_db_suffix

        clean_file = join(self.mono_speech_dir, file_name)
        noise_file = join(self.mono_noise_dir, file_name)
        mix_file = join(self.mix_dir, file_name)

        clean_data, _ = audiolib.audioread(clean_file)
        noise_data, _ = audiolib.audioread(noise_file)
        mix_data, _ = audiolib.audioread(mix_file)

        return torch.Tensor(clean_data), torch.Tensor(noise_data), torch.Tensor(mix_data), file_db_suffix

if __name__=="__main__":
    import data_gen_config
    data_config = data_gen_config.default()

    db_lvls = np.linspace(data_config['snr_lower'],
                        data_config['snr_upper'],
                        data_config['total_snrlevels'])

    dset = MultiChannelDataset(root_dir=data_config['output_data_dir'],
                                db_lvls=db_lvls)

    clean, noise, mix = dset[1]
    audiolib.audiowrite(clean.numpy().T, data_config['fs'], './debug/clean.wav')
    audiolib.audiowrite(noise.numpy().T, data_config['fs'], './debug/noise.wav')
    for i in range(len(mix)):
        audiolib.audiowrite(mix.numpy().T[:,i], data_config['fs'], './debug/mix_{}.wav'.format(i))
def main_gen(params):
    '''Calls gen_audio() to generate the audio signals, verifies that they meet
       the requirements, and writes the files to storage'''

    clean_source_files = []
    clean_clipped_files = []
    clean_low_activity_files = []

    clean_source_files2 = []
    clean_clipped_files2 = []
    clean_low_activity_files2 = []

    noise_source_files = []
    noise_clipped_files = []
    noise_low_activity_files = []

    clean_index = 0
    clean_index2 = 0
    noise_index = 0

    file_num = params['fileindex_start']
    cleanfilenames = params['cleanfilenames']
    cleanfilenames2 = params['cleanfilenames2']

    # spk_index = 0 # start of speaker index
    num_spk = len(cleanfilenames)

    while file_num <= params['fileindex_end']:
        # generate clean speech
        #clean, clean_sf, clean_cf, clean_laf, clean_index = \
        #    gen_audio(True, params, clean_index)
        spk_index = random.randint(0, len(params['cleanfilenames']) - 1)

        chosen_clean = gen_audio3(True, params, spk_index)
        num_clips = int(len(chosen_clean))

        #(True, params, clean_index)
        # add reverb with selected RIR
        #rir_index = random.randint(0,len(params['myrir'])-1)

        num_to_select1 = 1
        rirfilenames = params['myrir']

        chosen_clean_reverb = []

        for clean in chosen_clean:
            myrir = random.sample(rirfilenames, num_to_select1)
            (fs_rir, samples_rir) = wavfile.read(myrir[0])

            if len(samples_rir.shape) > 1:
                channel = random.randint(0, len(samples_rir.shape) - 1)
                samples_rir_ch = samples_rir[:, channel]
            else:
                samples_rir_ch = samples_rir

            clean_reverb = add_pyreverb(clean, samples_rir_ch)
            chosen_clean_reverb.append(clean_reverb)

        # add secondary speech and/or noise for each chunck of primary speech
        for chose_primary in chosen_clean_reverb:
            index2 = random.randint(0, len(params['cleanfilenames2']) - 1)

            clean2, clean_sf, clean_cf, clean_laf, clean_index = \
                gen_audio2(True, params, index2, chose_primary.shape[0])

            noise_index = random.randint(0, len(params['noisefilenames']) - 1)
            # generate noise
            noise, noise_sf, noise_cf, noise_laf, noise_index = \
                gen_audio(False, params, noise_index, chose_primary.shape[0])

            # if specified, use specified SNR value
            if not params['randomize_snr']:
                snr = params['snr']
            # use a randomly sampled SNR value between the specified bounds
            else:
                snr = np.random.randint(params['snr_lower'],
                                        params['snr_upper'])
                snr2 = np.random.randint(params['snr_lower'],
                                         params['snr_upper'])
                snr3 = np.random.randint(params['snr_lower'],
                                         params['snr_upper'])

            # 1. Primary(clean) + Noise
            clean_snr, noise_snr, noisy_snr, target_level = segmental_snr_mixer(
                params=params, clean=chose_primary, noise=noise, snr=snr)

            # 2. Primary + Secondary
            clean_snr2, noise_snr2, noisy_snr2, target_level2 = segmental_snr_mixer(
                params=params, clean=chose_primary, noise=clean2, snr=snr2)
            # 3. Primary + Seconday (clean2) + Noise
            clean_snr3, noise_snr3, noisy_snr3, target_level3 = segmental_snr_mixer(
                params=params, clean=noisy_snr2, noise=noise, snr=snr3)
            # unexpected clipping
            #if is_clipped(clean_snr) or is_clipped(noise_snr2) or is_clipped(noisy_snr2):
            if is_clipped(clean_snr) or is_clipped(noisy_snr):
                print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                    "returning without writing audio to disk")
                continue

            if is_clipped(clean_snr2) or is_clipped(noisy_snr2):
                print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                    "returning without writing audio to disk")
                continue

            if is_clipped(clean_snr3) or is_clipped(noisy_snr3):
                print("Warning: File #" + str(file_num) + " has unexpected clipping, " + \
                    "returning without writing audio to disk")
                continue

            clean_source_files += clean_sf
            noise_source_files += noise_sf

            # write resultant audio streams to files
            hyphen = '-'
            clean_source_filenamesonly = [
                i[:-4].split(os.path.sep)[-1] for i in clean_sf
            ]
            clean_files_joined = hyphen.join(
                clean_source_filenamesonly)[:MAXFILELEN]
            noise_source_filenamesonly = [
                i[:-4].split(os.path.sep)[-1] for i in noise_sf
            ]
            noise_files_joined = hyphen.join(
                noise_source_filenamesonly)[:MAXFILELEN]

            noisyfilename = 'primary_noisy_fileid_' + str(file_num) + '_' + clean_files_joined + '_' + noise_files_joined + '_snr' + \
                            str(snr) + '_tl' + str(target_level) + '.wav'

            cleanfilename = 'clean_fileid_' + str(file_num) + '.wav'
            noisefilename = 'noise_fileid_' + str(file_num) + '.wav'

            noisypath = os.path.join(params['noisyspeech_dir'], noisyfilename)
            cleanpath = os.path.join(params['clean_proc_dir'], cleanfilename)
            noisepath = os.path.join(params['noise_proc_dir'], noisefilename)

            noisyfilename2 = 'ps_noisy_fileid_'+ str(file_num) + '_' +  clean_files_joined + '_' + noise_files_joined + '_snr' + \
                            str(snr) + '_tl' + str(target_level) + '.wav'
            cleanfilename2 = 'ps_clean_fileid_' + str(file_num) + '.wav'
            noisefilename2 = 'ps_noise_fileid_' + str(file_num) + '.wav'

            noisypath2 = os.path.join(params['noisyspeech_dir'],
                                      noisyfilename2)
            cleanpath2 = os.path.join(params['clean_proc_dir'], cleanfilename2)
            noisepath2 = os.path.join(params['noise_proc_dir'], noisefilename2)

            noisyfilename3 = 'psn_noisy_fileid_' + str(file_num) + '_' +clean_files_joined + '_' + noise_files_joined + '_snr' + \
                            str(snr) + '_tl' + str(target_level) + '.wav'
            cleanfilename3 = 'psn_clean_fileid_' + str(file_num) + '.wav'
            noisefilename3 = 'psn_noise_fileid_' + str(file_num) + '.wav'

            noisypath3 = os.path.join(params['noisyspeech_dir'],
                                      noisyfilename3)
            cleanpath3 = os.path.join(params['clean_proc_dir'], cleanfilename3)
            noisepath3 = os.path.join(params['noise_proc_dir'], noisefilename3)

            audio_signals = [noisy_snr, clean_snr, noise_snr]
            file_paths = [noisypath, cleanpath, noisepath]

            audio_signals2 = [noisy_snr2, clean_snr, noise_snr2]
            file_paths2 = [noisypath2, cleanpath2, noisepath2]

            audio_signals3 = [noisy_snr3, clean_snr, noise_snr3]
            file_paths3 = [noisypath3, cleanpath3, noisepath3]

            file_num += 1  #         file_num = file_num + 3*num_clips

            for i in range(len(audio_signals)):
                try:
                    audiowrite(file_paths[i], audio_signals[i], params['fs'])
                    audiowrite(file_paths2[i], audio_signals2[i], params['fs'])
                    audiowrite(file_paths3[i], audio_signals3[i], params['fs'])
                except Exception as e:
                    print(str(e))

            # for i in range(len(audio_signals2)):
            #     try:
            #     except Exception as e:
            #         print(str(e))

            # for i in range(len(audio_signals3)):
            #     try:
            #         audiowrite(file_paths3[i], audio_signals3[i], params['fs'])
            #     except Exception as e:
            #         print(str(e))

    return clean_source_files, clean_clipped_files, clean_low_activity_files, \
                noise_source_files, noise_clipped_files, noise_low_activity_files