def __getitem__(self, idx):
        if self.feature_type == "waveform":
            path, label = self.item[idx]
            return (load_wave(path), label)
        else:
            path, label, trans, utt = self.item[idx]

            feat = kaldiio.load_mat(path)

            tmp = "aug"
            if tmp == "aug" and self.train:
                feat = spec_augment(feat)
                # TODO
                trans = [data_enhancement(tran) for tran in trans]
                trans = sum(trans, [])
            feat = skip_feat(make_context(feat, self.left_ctx, self.right_ctx),
                             self.n_skip_frame)
            seq_len, dim = feat.shape
            if seq_len % self.n_downsample != 0:
                pad_len = self.n_downsample - seq_len % self.n_downsample
                feat = np.vstack([feat, np.zeros((pad_len, dim))])
            if self.mel:
                return (F_Mel(torch.from_numpy(feat), audio_conf), label)
            else:
                return (torch.from_numpy(feat), torch.LongTensor(label),
                        torch.LongTensor(trans), utt)
예제 #2
0
 def __getitem__(self, idx):
     path, utt = self.item[idx]
     feat = kaldiio.load_mat(path)
     feat = skip_feat(make_context(feat, self.left_ctx, self.right_ctx),
                      self.n_skip_frame)
     seq_len, dim = feat.shape
     if seq_len % self.n_downsample != 0:
         pad_len = self.n_downsample - seq_len % self.n_downsample
         feat = np.vstack([feat, np.zeros((pad_len, dim))])
     if self.mel:
         return F_Mel(torch.from_numpy(feat), audio_conf)
     else:
         return (torch.from_numpy(feat), utt)
예제 #3
0
 def __getitem__(self, idx):
     if self.feature_type == "waveform":
         path, label = self.item[idx]
         return (load_wave(path), label)
     else:
         path, label, utt = self.item[idx]
         feat = kaldiio.load_mat(path)
         feat = skip_feat(make_context(feat, self.left_ctx, self.right_ctx),
                          self.n_skip_frame)
         seq_len, dim = feat.shape
         if seq_len % self.n_downsample != 0:
             pad_len = self.n_downsample - seq_len % self.n_downsample
             feat = np.vstack([feat, np.zeros((pad_len, dim))])
         if self.mel:
             return (F_Mel(torch.from_numpy(feat), audio_conf), label)
         else:
             return (torch.from_numpy(feat), torch.LongTensor(label), utt)
예제 #4
0
 def __getitem__(self, idx):
     if self.feature_type == "waveform":
         path, label = self.item[idx]
         return (load_wave(path), label)
     else:
         path, label, trans, utt = self.item[idx]
         feat = kaldiio.load_mat(path)
         ## data agument
         if (self.is_training == True):
             feat = spec_augment(feat)
             trans = [data_enhance(phoneme) for phoneme in trans]
         feat = skip_feat(make_context(feat, self.left_ctx, self.right_ctx),
                          self.n_skip_frame)
         seq_len, dim = feat.shape
         if seq_len % self.n_downsample != 0:
             pad_len = self.n_downsample - seq_len % self.n_downsample
             feat = np.vstack([feat, np.zeros((pad_len, dim))])
         if self.mel:
             return (F_Mel(torch.from_numpy(feat), audio_conf), label)
         else:
             return (torch.from_numpy(feat), torch.LongTensor(label),
                     torch.LongTensor(trans), utt)