def _compute_mvn_and_save(self, path): n = 0. x = 0. x2 = 0. n_utts = 0 for _, utt_feats in kio.read_mat_scp(self._feat_rspec): #with SBFMReader(self._feat_rspec) as f: #while not f.done(): #_, utt_feats = f.next() utt_feats = unflatten_channel(utt_feats, self._n_chan) x += np.sum(utt_feats, axis=1, keepdims=True) x2 += np.sum(utt_feats ** 2, axis=1, keepdims=True) n += utt_feats.shape[1] n_utts += 1 if n_utts % 500 == 0: info("accumulated %s utts" % n_utts) mean = x / n std = np.sqrt(x2 / n - mean ** 2) info("mean shape is %s, value is\n%s" % (mean.shape, mean)) info("std shape is %s, value is\n%s" % (std.shape, std)) self._mvn_params = {"mean": mean, "std": std} check_and_makedirs(os.path.dirname(path)) with open(path, "wb") as f: info("dumping mvn params to %s" % path) pickle.dump(self._mvn_params, f)
def _load_kaldi_feat_list(self): """ generate a dict of random access table """ start_time = time.time() self._utt_list = [] self._utt_len_list = [] self._utt2rawfeat = dict() for utt_id, utt_feats in kio.read_mat_scp(self._feat_rspec): if len(utt_feats) < self._seg_len: info("%s len (%s) shorter than seg_len (%s), discarded" % ( utt_id, len(utt_feats), self._seg_len)) else: self._utt_list.append(utt_id) self._utt_len_list.append(len(utt_feats)) if len(self._utt_list) % 500 == 0: info("scanned %s utts" % len(self._utt_list)) print("Original feats shape for last utterance", np.shape(utt_feats)) utt_feats = unflatten_channel(utt_feats, self._n_chan) #I dont need 3D tensor print("Feats shape after unflatten", np.shape(utt_feats)) print(self._use_chan, self._use_fbin) utt_feats = utt_feats[self._use_chan, :, self._use_fbin] print("Feats shape after conc with chan,fbin", np.shape(utt_feats)) self._feat_shape = (utt_feats.shape[0], self._seg_len, utt_feats.shape[2]) print("Feats shape with segments", self._feat_shape) self._feat_dim = np.prod(self._feat_shape) self._kaldi_reader = { utt:np.array(feat) for utt,feat in kio.read_mat_scp(self._feat_rspec) } info("scanning kaldi feat takes %.2f s, #utt=%s, #frames=%s, feat shape is %s, dim is %s" % ( time.time() - start_time, len(self._utt_list), sum(self._utt_len_list), self._feat_shape, self._feat_dim))
def _load_kaldi_feat_list(self): """ generate a dict of random access table """ start_time = time.time() self._utt_list = [] self._utt_len_list = [] self._utt2rawfeat = dict() with SBFMReader(self._feat_rspec) as f: while not f.done(): utt_id, utt_feats = f.next() if len(utt_feats) < self._seg_len: info("%s len (%s) shorter than seg_len (%s), discarded" % ( utt_id, len(utt_feats), self._seg_len)) else: self._utt_list.append(utt_id) self._utt_len_list.append(len(utt_feats)) if len(self._utt_list) % 500 == 0: info("scanned %s utts" % len(self._utt_list)) utt_feats = unflatten_channel(utt_feats, self._n_chan) utt_feats = utt_feats[self._use_chan, :, self._use_fbin] self._feat_shape = (utt_feats.shape[0], self._seg_len, utt_feats.shape[2]) self._feat_dim = np.prod(self._feat_shape) self._kaldi_reader = RABFMReader(self._feat_rspec) info("scanning kaldi feat takes %.2f s, #utt=%s, #frames=%s, feat shape is %s, dim is %s" % ( time.time() - start_time, len(self._utt_list), sum(self._utt_len_list), self._feat_shape, self._feat_dim))
def plot_kaldi_feat_fac(rspec, img_dir, feat_type="fbank_raw"): """ visualizing factorization utt_id of format: "%s_%s_%s_%s" % (i, j, utt_id_i, utt_id_j), for i in range(n_i), j in range(n_j) """ print("plot factorization:") print("\tfeat_rspec %s, save images to %s" % (rspec, repr(img_dir))) mode = "show" if img_dir is None else "save" if img_dir is not None: check_and_makedirs(img_dir) toks_feats_list = [ (tup[0].split("_"), unflatten_channel(tup[1], 1)[np.newaxis, ...]) \ for tup in SBFMReader(rspec)] fac_utt_id_toks_list, fac_feats_list = zip( *[(tup[0], tup[1]) for tup in toks_feats_list if int(tup[0][0]) > -1 and int(tup[0][1]) > -1]) n_i = max([int(toks[0]) for toks in fac_utt_id_toks_list]) + 1 n_j = max([int(toks[1]) for toks in fac_utt_id_toks_list]) + 1 feats_shape = fac_feats_list[0].shape fac_feats_list = np.asarray(fac_feats_list).reshape([n_i, n_j] + list(feats_shape)) figsize = (3.5 * n_i, 5. * n_j) img_path = "%s/fac.png" % img_dir plot_grids(fac_feats_list, feat_type=feat_type, mode=mode, name=img_path, figsize=figsize) X1_utt_id_toks_list, X1_feats_list = zip(*[(tup[0], tup[1]) for tup in toks_feats_list if int(tup[0][1]) == -1]) X1_feats_list = np.asarray(X1_feats_list).reshape([n_i, 2] + list(feats_shape)) figsize = (3.5 * n_i, 5. * 2) img_path = "%s/X1.png" % img_dir plot_grids(X1_feats_list, feat_type=feat_type, mode=mode, name=img_path, figsize=figsize) X2_utt_id_toks_list, X2_feats_list = zip(*[(tup[0], tup[1]) for tup in toks_feats_list if int(tup[0][0]) == -1]) X2_feats_list = np.asarray(X2_feats_list).reshape([n_j, 2] + list(feats_shape)) X2_feats_list = X2_feats_list.transpose((1, 0, 2, 3, 4, 5)) figsize = (3.5 * 2, 5. * n_j) img_path = "%s/X2.png" % img_dir plot_grids(X2_feats_list, feat_type=feat_type, mode=mode, name=img_path, figsize=figsize)
def _load_kaldi_feat_by_utt_id(self, utt_id_list): """load raw features to memory if self._max_to_load, clean cached; else check and load """ if self._max_to_load > 0: del self._utt2rawfeat self._utt2rawfeat = dict() # fast check if having loaded all utterances if len(self._utt2rawfeat) == len(self._utt_list): return for utt_id in utt_id_list: # only load those which are not in memory if not utt_id in self._utt2rawfeat: feats = self._kaldi_reader[utt_id] self._utt2rawfeat[utt_id] = unflatten_channel( feats, self._n_chan)
def plot_kaldi_feat(wspec, img_dir, feat_type="fbank_raw"): print("plotting wspec %s, save images to %s" % (wspec, repr(img_dir))) mode = "show" if img_dir is None else "save" if img_dir is not None: check_and_makedirs(img_dir) img_h = 4. with SBFMReader(wspec) as f: while not f.done(): utt_id, utt_feats = f.next() img_w = img_h / utt_feats.shape[1] * utt_feats.shape[0] utt_feats = unflatten_channel(utt_feats, 1)[np.newaxis, ...] plot_rows([utt_feats], utt_id, feat_type=feat_type, mode=mode, name=os.path.join(img_dir, "%s.png" % utt_id), figsize=(img_w, img_h))