def _compute_mvn_and_save(self, path):
     n = 0.
     x = 0.
     x2 = 0.
     n_utts = 0
     for _, utt_feats in kio.read_mat_scp(self._feat_rspec):
     #with SBFMReader(self._feat_rspec) as f:
         #while not f.done():
             #_, utt_feats = f.next()
         utt_feats = unflatten_channel(utt_feats, self._n_chan)
         x += np.sum(utt_feats, axis=1, keepdims=True)
         x2 += np.sum(utt_feats ** 2, axis=1, keepdims=True)
         n += utt_feats.shape[1]
         n_utts += 1
         if n_utts % 500 == 0:
             info("accumulated %s utts" % n_utts)
     mean = x / n
     std = np.sqrt(x2 / n - mean ** 2)
     info("mean shape is %s, value is\n%s" % (mean.shape, mean))
     info("std shape is %s, value is\n%s" % (std.shape, std))
     self._mvn_params = {"mean": mean, "std": std}
     check_and_makedirs(os.path.dirname(path))
     with open(path, "wb") as f:
         info("dumping mvn params to %s" % path)
         pickle.dump(self._mvn_params, f)
 def _load_kaldi_feat_list(self):
     """
     generate a dict of random access table
     """
     start_time = time.time()
     self._utt_list = []
     self._utt_len_list = []
     self._utt2rawfeat = dict()
     for utt_id, utt_feats in kio.read_mat_scp(self._feat_rspec):
          if len(utt_feats) < self._seg_len:
             info("%s len (%s) shorter than seg_len (%s), discarded" % (
                 utt_id, len(utt_feats), self._seg_len))
          else:
             self._utt_list.append(utt_id)
             self._utt_len_list.append(len(utt_feats))
          if len(self._utt_list) % 500 == 0:
             info("scanned %s utts" % len(self._utt_list))
     print("Original feats shape for last utterance", np.shape(utt_feats))
     utt_feats = unflatten_channel(utt_feats, self._n_chan) #I dont need 3D tensor
     print("Feats shape after unflatten", np.shape(utt_feats))
     print(self._use_chan, self._use_fbin)
     utt_feats = utt_feats[self._use_chan, :, self._use_fbin]
     print("Feats shape after conc with chan,fbin", np.shape(utt_feats))
     self._feat_shape = (utt_feats.shape[0], self._seg_len, utt_feats.shape[2])
     print("Feats shape with segments", self._feat_shape)
     self._feat_dim = np.prod(self._feat_shape)
     self._kaldi_reader = { utt:np.array(feat) for utt,feat in kio.read_mat_scp(self._feat_rspec) }
     info("scanning kaldi feat takes %.2f s, #utt=%s, #frames=%s, feat shape is %s, dim is %s" % (
             time.time() - start_time, len(self._utt_list), 
             sum(self._utt_len_list), self._feat_shape, self._feat_dim))
示例#3
0
    def _load_kaldi_feat_list(self):
        """
        generate a dict of random access table
        """
        start_time = time.time()
        self._utt_list = []
        self._utt_len_list = []
        self._utt2rawfeat = dict()
        with SBFMReader(self._feat_rspec) as f:
            while not f.done():
                utt_id, utt_feats = f.next()
                if len(utt_feats) < self._seg_len:
                    info("%s len (%s) shorter than seg_len (%s), discarded" % (
                            utt_id, len(utt_feats), self._seg_len))
                else:
                    self._utt_list.append(utt_id)
                    self._utt_len_list.append(len(utt_feats))
                if len(self._utt_list) % 500 == 0:
                    info("scanned %s utts" % len(self._utt_list))

        utt_feats = unflatten_channel(utt_feats, self._n_chan)
        utt_feats = utt_feats[self._use_chan, :, self._use_fbin]
        self._feat_shape = (utt_feats.shape[0], self._seg_len, utt_feats.shape[2])
        self._feat_dim = np.prod(self._feat_shape)

        self._kaldi_reader = RABFMReader(self._feat_rspec)
        info("scanning kaldi feat takes %.2f s, #utt=%s, #frames=%s, feat shape is %s, dim is %s" % (
                time.time() - start_time, len(self._utt_list), 
                sum(self._utt_len_list), self._feat_shape, self._feat_dim))
def plot_kaldi_feat_fac(rspec, img_dir, feat_type="fbank_raw"):
    """
    visualizing factorization utt_id of format: 
    "%s_%s_%s_%s" % (i, j, utt_id_i, utt_id_j), 
    for i in range(n_i), j in range(n_j)
    """
    print("plot factorization:")
    print("\tfeat_rspec %s, save images to %s" % (rspec, repr(img_dir)))
    mode = "show" if img_dir is None else "save"
    if img_dir is not None:
        check_and_makedirs(img_dir)
    toks_feats_list = [
            (tup[0].split("_"), unflatten_channel(tup[1], 1)[np.newaxis, ...]) \
            for tup in SBFMReader(rspec)]

    fac_utt_id_toks_list, fac_feats_list = zip(
        *[(tup[0], tup[1]) for tup in toks_feats_list
          if int(tup[0][0]) > -1 and int(tup[0][1]) > -1])
    n_i = max([int(toks[0]) for toks in fac_utt_id_toks_list]) + 1
    n_j = max([int(toks[1]) for toks in fac_utt_id_toks_list]) + 1
    feats_shape = fac_feats_list[0].shape
    fac_feats_list = np.asarray(fac_feats_list).reshape([n_i, n_j] +
                                                        list(feats_shape))
    figsize = (3.5 * n_i, 5. * n_j)
    img_path = "%s/fac.png" % img_dir
    plot_grids(fac_feats_list,
               feat_type=feat_type,
               mode=mode,
               name=img_path,
               figsize=figsize)

    X1_utt_id_toks_list, X1_feats_list = zip(*[(tup[0], tup[1])
                                               for tup in toks_feats_list
                                               if int(tup[0][1]) == -1])
    X1_feats_list = np.asarray(X1_feats_list).reshape([n_i, 2] +
                                                      list(feats_shape))
    figsize = (3.5 * n_i, 5. * 2)
    img_path = "%s/X1.png" % img_dir
    plot_grids(X1_feats_list,
               feat_type=feat_type,
               mode=mode,
               name=img_path,
               figsize=figsize)

    X2_utt_id_toks_list, X2_feats_list = zip(*[(tup[0], tup[1])
                                               for tup in toks_feats_list
                                               if int(tup[0][0]) == -1])
    X2_feats_list = np.asarray(X2_feats_list).reshape([n_j, 2] +
                                                      list(feats_shape))
    X2_feats_list = X2_feats_list.transpose((1, 0, 2, 3, 4, 5))
    figsize = (3.5 * 2, 5. * n_j)
    img_path = "%s/X2.png" % img_dir
    plot_grids(X2_feats_list,
               feat_type=feat_type,
               mode=mode,
               name=img_path,
               figsize=figsize)
示例#5
0
 def _load_kaldi_feat_by_utt_id(self, utt_id_list):
     """load raw features to memory
     if self._max_to_load, clean cached; else check and load
     """
     if self._max_to_load > 0:
         del self._utt2rawfeat
         self._utt2rawfeat = dict()
     # fast check if having loaded all utterances
     if len(self._utt2rawfeat) == len(self._utt_list):
         return
     for utt_id in utt_id_list:
         # only load those which are not in memory
         if not utt_id in self._utt2rawfeat:
             feats = self._kaldi_reader[utt_id]
             self._utt2rawfeat[utt_id] = unflatten_channel(
                 feats, self._n_chan)
示例#6
0
def plot_kaldi_feat(wspec, img_dir, feat_type="fbank_raw"):
    print("plotting wspec %s, save images to %s" % (wspec, repr(img_dir)))
    mode = "show" if img_dir is None else "save"
    if img_dir is not None:
        check_and_makedirs(img_dir)
    img_h = 4.
    with SBFMReader(wspec) as f:
        while not f.done():
            utt_id, utt_feats = f.next()
            img_w = img_h / utt_feats.shape[1] * utt_feats.shape[0]
            utt_feats = unflatten_channel(utt_feats, 1)[np.newaxis, ...]
            plot_rows([utt_feats],
                      utt_id,
                      feat_type=feat_type,
                      mode=mode,
                      name=os.path.join(img_dir, "%s.png" % utt_id),
                      figsize=(img_w, img_h))