def prepare_data(training=True): if training: flag = "train_clean_5" else: flag = "dev_clean_2" print(f"{flag}: Load feature...") featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark" feats = exkaldi.load_feat(featsFile) if args.cmn: print(f"{flag}: Use cmvn...") cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark" cmvn = exkaldi.load_cmvn(cmvnFile) feats = exkaldi.use_cmvn(feats, cmvn, utt2spk=f"{args.root}/data/{flag}/utt2spk") del cmvn if args.delta > 0: print(f"{flag}: Add delta...") feats = feats.add_delta(args.delta) if args.splice > 0: print(f"{flag}: Splice feature...") feats = feats.splice(args.splice) feats = feats.to_numpy() featDim = feats.dim print(f"{flag}: Load alignment...") ali = exkaldi.load_ali(f"{args.feat}/exp/tri3b_ali_{flag}/ali.*.gz") print(f"{flag}: Get pdf alignment...") pdfAli = ali.to_numpy(aliType="pdfID", hmm=f"{args.feat}/exp/tri3b_ali_{flag}/final.mdl") del ali feats.rename("feat") pdfAli.rename("pdfID") #phoneAli.rename("phoneID") print(f"{flag}: Tuple dataset...") dataset = exkaldi.tuple_dataset([feats, pdfAli], frameLevel=True) random.shuffle(dataset) return featDim, dataset
def tuple_dataset(feat,pdfAli,phoneAli,cutLength=None): if cutLength is not None: newFeat = feat.cut(cutLength).sort(by="frame") newPdfAli = pdfAli.cut(cutLength).sort(by="frame") newPhoneAli = phoneAli.cut(cutLength).sort(by="frame") else: newFeat = feat.sort(by="frame") newPdfAli = pdfAli.sort(by="frame") newPhoneAli = phoneAli.sort(by="frame") newFeat.rename("feat") newPdfAli.rename("pdfID") newPhoneAli.rename("phoneID") dataset = exkaldi.tuple_dataset([newFeat, newPdfAli, newPhoneAli], frameLevel=False) return dataset
def process_feat_ali(training=True): if training: Name = "train" else: Name = "dev" feat = exkaldi.load_feat(f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark") if args.useCMVN: cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark") feat = exkaldi.use_cmvn( feat, cmvn, f"{args.expDir}/train_dnn/data/{Name}/utt2spk") del cmvn if args.delta > 0: feat = feat.add_delta(args.delta) if args.splice > 0: feat = feat.splice(args.splice) feat = feat.to_numpy() if args.normalizeFeat: feat = feat.normalize(std=True) pdfAli = exkaldi.load_ali(f"{args.expDir}/train_dnn/data/{Name}/pdfID.npy") phoneAli = exkaldi.load_ali( f"{args.expDir}/train_dnn/data/{Name}/phoneID.npy") feat.rename("feat") pdfAli.rename("pdfID") phoneAli.rename("phoneID") dataset = exkaldi.tuple_dataset([feat, pdfAli, phoneAli], frameLevel=True) random.shuffle(dataset) return dataset