Python estimate_IBM示例，fgnt.mask_estimation.estimate_IBM Python示例

示例#1

0

显示文件

def prepare_training_data(chime_data_dir, dest_dir, suffix_id):
    for stage in ['tr', 'dt']:
        fpath, flist = gen_flist_simu(chime_data_dir, stage, suffix_id)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(fpath)):
            clean_audio = get_audio_data(f, '_clean')
            noise_audio = get_audio_data(f, '_noise')
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))
            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(
                os.path.join(dest_dir,
                             'flist_{}_{}.json'.format(stage, suffix_id)),
                'w') as fid:
            json.dump(export_flist, fid, indent=4)

示例#2

0

显示文件

文件： chime_data.py 项目： 910882575/NNMaskZT

def prepare_training_data(chime_data_dir, dest_dir):
    for stage in ['tr', 'dt']:
        flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        export_flist = list()
        # mkdir_p(os.path.join(dest_dir, stage))
        for idx in range(len(flist)):
            f = flist[idx]
            clean_audio = get_audio_data(f, '.Clean')
            noise_audio = get_audio_data(f, '.Noise')
            X = stft(clean_audio, time_dim=1, size=256, shift=128).transpose(
                (1, 0, 2))
            N = stft(noise_audio, time_dim=1, size=256, shift=128).transpose(
                (1, 0, 2))
            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)

示例#3

0

显示文件

def prepare_clean_training_data(chime_data_dir, dest_dir):
    start = 0
    # print("sdsd")
    for stage in ['tr', 'dt']:
        reset_counter = 0
        flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        # print(flist)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        clean_data = audioread('/media/hipo/Mega Store/Dataset/single file/Chinese_tai_clean.wav')
        print("clean_data size:", clean_data.shape[0])
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            # clean_audio = get_audio_data(f, '.Clean')
            noise_audio = get_audio_data(f, '.Noise')
            # print(chime_data_dir)
            chime_size = audioread('{}.CH{}{}.Noise.wav'.format(f, 1, ''))
            clean_files = list()
            end = chime_size.shape[0] + start
            if end > clean_data.shape[0]:
                print("reset counter: ", reset_counter + 1)
                start = 0
                end = chime_size.shape[0] + start
            for i in range(1, 7):
                y = clean_data[start:end]
            start = end
            clean_files.append(y[None, :])
            clean_files = np.concatenate(clean_files, axis=0)
            clean_files = clean_files.astype(np.float32)
            clean_audio = clean_files

            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))

            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)

示例#4

0

显示文件

文件： chime_data.py 项目： fgnt/nn-gev

def prepare_training_data(chime_data_dir, dest_dir):
    for stage in ['tr', 'dt']:
        flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            clean_audio = get_audio_data(f, '.Clean')
            noise_audio = get_audio_data(f, '.Noise')
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))
            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)

示例#5

0

显示文件

def prepare_other_training_data(train_dir, dest_dir):
    start = 0
    chime_data_dir = os.path.join(train_dir[:-1], 'tr')
    print(chime_data_dir)

    for stage in ['tr', 'dt']:
        if stage is 'dt':
            chime_data_dir = os.path.join(train_dir[:-1], 'dt')
            print(chime_data_dir)
        reset_counter = 0
        # flist = gen_flist_simu(chime_data_dir, stage, ext=True)
        flist = [f for f in listdir(chime_data_dir) if isfile(join(chime_data_dir, f))]
        # print(flist)
        export_flist = list()
        mkdir_p(os.path.join(dest_dir, stage))
        noise_data = audioread('/media/hipo/lento/Dataset/single file/noise_files/all_noise.wav')
        print("noise_data size:", noise_data.shape[0])
        for f in tqdm.tqdm(flist, desc='Generating data for {}'.format(stage)):
            # clean_audio = get_audio_data(f)
            path = os.path.join(chime_data_dir, f)
            clean_audio = get_audio_single(path)
            # clean_audioa = audioread(path)
            # clean_audiob = audioread(path)
            # multi_track = list()
            # multi_track.append(clean_audioa[None, :])
            # multi_track.append(clean_audiob[None, :])
            # multi_track = np.concatenate(multi_track, axis=0)
            # multi_track = multi_track.astype(np.float32)
            # print(multi_track.shape)
            chime_size = audioread(path)

            noise_files = list()
            end = chime_size.shape[0] + start
            if end > noise_data.shape[0]:
                print("reset counter: ", reset_counter + 1)
                start = 0
                end = chime_size.shape[0] + start
            for i in range(1, 2):
                y = noise_data[start:end]
            start = end
            noise_files.append(y[None, :])

            noise_files = np.concatenate(noise_files, axis=0)
            noise_files = noise_files.astype(np.float32)
            noise_audio = noise_files
            # print("speech size: ", multi_track.shape, "noise size: ", noise_audio.shape)
            X = stft(clean_audio, time_dim=1).transpose((1, 0, 2))
            N = stft(noise_audio, time_dim=1).transpose((1, 0, 2))

            IBM_X, IBM_N = estimate_IBM(X, N)
            Y_abs = np.abs(X + N)
            export_dict = {
                'IBM_X': IBM_X.astype(np.float32),
                'IBM_N': IBM_N.astype(np.float32),
                'Y_abs': Y_abs.astype(np.float32)
            }
            export_name = os.path.join(dest_dir, stage, f.split('/')[-1])
            with open(export_name, 'wb') as fid:
                pickle.dump(export_dict, fid)
            export_flist.append(os.path.join(stage, f.split('/')[-1]))
        with open(os.path.join(dest_dir, 'flist_{}.json'.format(stage)),
                  'w') as fid:
            json.dump(export_flist, fid, indent=4)

示例#6

0

显示文件

    def get_batch(self, uttid, channel=0, divisor=16, max_length=np.inf):
        """ Load a batch of data from files """

        batch = {}

        if 'numpy' in self.flists:
            fname = self.flists['numpy']['data'][uttid]
            feats = load_arrays_from_numpy(self.base_dir, fname)
        else:
            feats = {}
            start_idx = None
            for name in ['clean', 'noisy', 'noise'] & self.flists.keys():

                fname = self.flists[name]['data'][uttid]
                if self.flists[name]['type'] == 'json':
                    feats[name] = load_arrays_from_wav(self.base_dir,
                                                       fname,
                                                       idx=channel)
                    feats[name] = np.expand_dims(feats[name], axis=0)
                elif self.flists[name]['type'] == 'scp':
                    feats[name] = load_arrays_from_scp(
                        self.base_dir, fname, remove_deltas=name == 'noisy')
                else:
                    raise ValueError("Type must be one of 'json', 'scp'")

        if 'noise' in feats:
            feats['noisy'] = feats['clean'] + feats['noise']
            del feats['noise']

        # Reshape features
        for name in ['clean', 'noisy'] & feats.keys():
            if feats[name].dtype == 'complex64':
                feats[name] = np.abs(feats[name])
            if self.logify:
                feats[name] = np.log(feats[name] + 0.01)

        if 'senone' in self.flists:
            feats['senone'] = self.flists['senone']['data'][uttid]
            feats['senone'] = np.expand_dims(feats['senone'], axis=0)

        if 'senone' in feats:
            feats['senone'] = np.expand_dims(feats['senone'], axis=0)
            if feats['senone'].shape[2] < feats['clean'].shape[2]:
                padding = [
                    (0, 0), (0, 0),
                    (0, feats['clean'].shape[2] - feats['senone'].shape[2])
                ]
                feats['senone'] = np.pad(feats['senone'], padding, 'edge')

        start = -1
        for name in ['clean', 'noisy', 'noise', 'senone'] & feats.keys():
            if feats[name].shape[2] > max_length:
                if start == -1:
                    start = np.random.randint(feats[name].shape[2] -
                                              max_length)

                feats[name] = feats[name][:, :, start:start + max_length]
            feats['frames'] = feats[name].shape[-2]

        #feats = shrink_to_min(feats, out_shape, max_length)

        if self.compute_ibm:
            if 'noise' in feats and 'clean' in feats:
                feats['ibm_x'], feats['ibm_n'] = estimate_IBM(
                    feats['clean'], feats['noise'])
            elif 'noisy' in feats and 'clean' in feats:
                feats['ibm_x'], feats['ibm_n'] = estimate_IBM(
                    feats['clean'], feats['noisy'], -15, -15)
            else:
                raise ValueError(
                    "To compute IBM, clean and noise or noisy signals are required"
                )

        if self.compute_irm:
            if 'noise' in feats and 'clean' in feats:
                feats['irm'] = feats['clean'] / (feats['clean'] +
                                                 feats['noise'])
                #feats['irm'][feats['irm'] > 1] = 1
            elif 'noisy' in feats and 'clean' in feats:
                if np.min(feats['clean']) < 0:
                    minimum = min(np.min(feats['clean']),
                                  np.min(feats['noisy']))
                    feats['irm'] = (feats['clean'] - minimum +
                                    1e-6) / (feats['noisy'] - minimum + 1e-6)
                    #feats['irm'] /= 1.3
                else:
                    feats['irm'] = np.sqrt(feats['clean']) / np.sqrt(
                        feats['noisy'])
                    #feats['irm'] /= 2
                feats['irm'][feats['irm'] > 1] = 1
            else:
                raise ValueError(
                    "To compute IRM, clean and noise or noisy signals are required"
                )

        #if 'noise' in feats:
        #    feats['noisy'] = feats['noise'] + feats['clean']

        if 'trans' in self.flists:
            indices = np.array(
                [(0, i)
                 for i in range(len(self.flists['trans']['data'][uttid]))],
                dtype=np.int32)
            values = np.array(self.flists['trans']['data'][uttid],
                              dtype=np.int32)
            shape = np.array((1, len(self.flists['trans']['data'][uttid])),
                             dtype=np.int32)
            feats['trans'] = (indices, values, shape)

        return feats