def process_audio(self, wav_path, label_file, char_file, h5_file): #read map file self.char_map, self.int2phone = process_map_file(char_file) #read the label file label_dict = process_label_file(label_file, self.out_type, self.char_map) #extract spectrum spec_dict = dict() f = open(wav_path, 'r') for line in f.readlines(): utt, path = line.strip().split() spect = parse_audio(path, audio_conf, windows, normalize=self.normalize) spec_dict[utt] = spect.numpy() f.close() assert len(spec_dict) == len(label_dict) self.features_label = [] #save the data as h5 file f = h5py.File(h5_file, 'w') f.create_dataset("phone_map_key", data=self.char_map.keys()) f.create_dataset("phone_map_value", data=self.char_map.values()) for utt in spec_dict: grp = f.create_group(utt) self.features_label.append( (torch.FloatTensor(spec_dict[utt]), label_dict[utt].tolist())) grp.create_dataset('data', data=spec_dict[utt]) grp.create_dataset('label', data=label_dict[utt]) print("Saved the %s data to h5py file" % self.data_set)
def process_txt(self, mfcc_file, label_file, char_file, h5_file): #read map file self.char_map, self.int2phone = process_map_file(char_file) #read the label file label_dict = process_label_file(label_file, self.out_type, self.char_map) #read the mfcc file mfcc_dict = process_kaldi_feat(mfcc_file, self.n_feats) if len(mfcc_dict) != len(label_dict): print("%s data: The num of wav and text are not the same!" % self.data_set) sys.exit(1) self.features_label = [] #save the data as h5 file f = h5py.File(h5_file, 'w') f.create_dataset("phone_map_key", data=self.char_map.keys()) f.create_dataset("phone_map_value", data=self.char_map.values()) for utt in mfcc_dict: grp = f.create_group(utt) self.features_label.append( (torch.FloatTensor(np.array(mfcc_dict[utt])), label_dict[utt].tolist())) grp.create_dataset('data', data=np.array(mfcc_dict[utt])) grp.create_dataset('label', data=label_dict[utt]) print("Saved the %s data to h5py file" % self.data_set)
def __init__(self, data_dir, data_set='train', feature_type='spectrum', out_type='phone', n_feats=39, mel=True): self.data_set = data_set self.out_type = out_type self.feature_type = feature_type self.mel = mel scp_file = os.path.join(data_dir, data_set, feature_type + '.scp') label_file = os.path.join(data_dir, data_set, out_map[out_type] + '_text') class_file = os.path.join(data_dir, out_type + '_list.txt') self.class2int, self.int2class = process_map_file(class_file) if feature_type == "waveform": self.label_dict = process_label_file(label_file, self.out_type, self.class2int) self.item = [] with open(wav_path, 'r') as f: for line in f.readlines(): utt, path = line.strip().split('\t') self.item.append((path, self.label_dict[utt])) else: self.process_scp_label(scp_file, label_file)
def process_audio(self, wav_path, label_file): # read the label file self.label = process_label_file(label_file, self.char2int) # read the path file self.path = [] with open(wav_path, 'r') as f: for line in f.readlines(): utt, path = line.strip().split() self.path.append(path) # ensure the same samples of input and label assert len(self.label) == len(self.path)
def process_audio(self, wav_path, label_file): #read the label file self.label = process_label_file(label_file, self.char2int) #read the path file self.path = [] with open(wav_path, 'r') as f: for line in f.readlines(): utt, path = line.strip().split() self.path.append(path) #ensure the same samples of input and label assert len(self.label) == len(self.path)
def process_audio(self, wav_path, label_file, char_file, h5_file): #read map file self.char_map, self.int2phone = process_map_file(char_file) #read the label file label_dict = process_label_file(label_file, self.out_type, self.char_map) #extract spectrum spec_dict = dict() f = open(wav_path, 'r') for line in f.readlines(): utt, path = line.strip().split() spect = self.parse_audio(path) #print(spect) spec_dict[utt] = spect.numpy() f.close() if self.normalize: i = 0 for utt in spec_dict: if i == 0: spec_all = torch.FloatTensor(spec_dict[utt]) else: spec_all = torch.cat( (spec_all, torch.FloatTensor(spec_dict[utt])), 0) i += 1 mean = torch.mean(spec_all, 0, True) std = torch.std(spec_all, 0, True) for utt in spec_dict: tmp = torch.add(torch.FloatTensor(spec_dict[utt]), -1, mean) spec_dict[utt] = torch.div(tmp, std).numpy() if len(spec_dict) != len(label_dict): print("%s data: The num of wav and text are not the same!" % self.data_set) sys.exit(1) self.features_label = [] #save the data as h5 file f = h5py.File(h5_file, 'w') f.create_dataset("phone_map_key", data=self.char_map.keys()) f.create_dataset("phone_map_value", data=self.char_map.values()) for utt in spec_dict: grp = f.create_group(utt) self.features_label.append( (torch.FloatTensor(spec_dict[utt]), label_dict[utt].tolist())) grp.create_dataset('data', data=spec_dict[utt]) grp.create_dataset('label', data=label_dict[utt]) print("Saved the %s data to h5py file" % self.data_set)
def process_scp_label(self, scp_file, label_file): #read the label file label_dict = process_label_file(label_file, self.out_type, self.class2int) path_dict = {} #read the scp file with open(scp_file, 'r') as rf: for lines in rf.readlines(): utt, path = lines.strip().split() path_dict[utt] = path assert len(path_dict) == len(label_dict) self.item = [] for utt in path_dict: self.item.append((path_dict[utt], label_dict[utt]))
def preprocessing(basedir, split_train=True, split_ratio=0.1): """ Pre-processing raw data files. One should download the MNIST data beforehand using the bash scripts in `run_all.sh` If no data is found, prompt error info. Args: basedir (str): folder where the raw data files located split_train: if True split training set into training set and validation set ratio (float): ratio of the validation set """ train_file_set = { 'image': 'train-images-idx3-ubyte', 'label': 'train-labels-idx1-ubyte' } test_file_set = { 'image': 't10k-images-idx3-ubyte', 'label': 't10k-labels-idx1-ubyte' } training_file = 'training.pt' test_file = 'testing.pt' # process and save as torch files print('Processing...') for val in train_file_set.values(): if not os.path.exists(os.path.join(basedir, val)): print('%s does not exist. Check the dataset folder.' % os.path.join(basedir, val)) return for val in test_file_set.values(): if not os.path.exists(os.path.join(basedir, val)): print('%s does not exist. Check the dataset folder.' % os.path.join(basedir, val)) return length, labels = process_label_file( os.path.join(basedir, train_file_set['label'])) train_labels_pt = torch.from_numpy(labels).view(length).long() length, num_rows, num_cols, images = process_image_file( os.path.join(basedir, train_file_set['image'])) train_image_pt = torch.from_numpy(images).view(length, num_rows, num_cols) mean_train, std_train = np.mean(images) / 255.0, np.std(images) / 255.0 length, labels = process_label_file( os.path.join(basedir, test_file_set['label'])) test_labels_pt = torch.from_numpy(labels).view(length).long() length, num_rows, num_cols, images = process_image_file( os.path.join(basedir, test_file_set['image'])) test_image_pt = torch.from_numpy(images).view(length, num_rows, num_cols) np.save(os.path.join(basedir, 'stats'), [mean_train, std_train]) with open(os.path.join(basedir, training_file), 'wb') as f: torch.save((train_image_pt, train_labels_pt), f) with open(os.path.join(basedir, test_file), 'wb') as f: torch.save((test_image_pt, test_labels_pt), f) if split_train: print('Spliting training set...') idx = np.random.permutation(len(train_labels_pt)) val_len = int(len(train_labels_pt) * split_ratio) label_validation_split = train_labels_pt[idx[:val_len]] label_train_split = train_labels_pt[idx[val_len:]] image_validation_split = train_image_pt[idx[:val_len]] image_train_split = train_image_pt[idx[val_len:]] with open(os.path.join(basedir, 'validation_split.pt'), 'wb') as f: torch.save((image_validation_split, label_validation_split), f) with open(os.path.join(basedir, 'training_split.pt'), 'wb') as f: torch.save((image_train_split, label_train_split), f) print('Done!')