def add_delta(feat,order=2,outFile=None): ''' Add n order delta to feature. Share Args: Null Parrallel Args: <feat>: exkaldi feature objects. <order>: the orders. <outFile>: output file name. Return: exkaldi feature or index table object. ''' feats,orders,outFiles = check_multiple_resources(feat,order,outFile=outFile) names = [] for feat,order in zip(feats,orders): # check feature declare.is_feature("feat",feat) # check order declare.is_positive_int("order",order) names.append(f"add_delta({feat.name},{order})") # prepare command pattern and resources cmdPattern = "add-deltas --delta-order={order} {feat} ark:{outFile}" resources = {"feat":feats,"order":orders,"outFile":outFiles} # run return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def compute_cmvn_stats(feat,spk2utt=None,name="cmvn",outFile=None): ''' Compute CMVN statistics. Share Args: Null Parrallel Args: <feat>: exkaldi feature object or index table object. <spk2utt>: spk2utt file or exkaldi ListTable object. <name>: name of output CMVN object. <outFile>: output file name. Return: exkaldi CMVN statistics or index table object. ''' feats,spk2utts,names,outFiles = check_multiple_resources(feat,spk2utt,name,outFile=outFile) for feat,spk2utt in zip(feats,spk2utts): # verify feature declare.is_feature("feat",feat) # verify spk2utt if spk2utt is not None: declare.is_potential_list_table("spk2utt",spk2utt) if spk2utts[0] is None: cmdPattern = 'compute-cmvn-stats {feat} ark:{outFile}' resources = {"feat":feats,"outFile":outFiles} else: cmdPattern = 'compute-cmvn-stats --spk2utt=ark:{spk2utt} {feat} ark:{outFile}' resources = {"feat":feats,"spk2utt":spk2utts,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="cmvn",archiveNames=names)
def use_fmllr(feat,fmllrMat,utt2spk,outFile=None): ''' Transfrom to fmllr feature. Share Args: Null Parallel Args: <feat>: exkaldi feature or index table object. <fmllrMat>: exkaldi fMLLR transform matrix or index table object. <utt2spk>: file name or ListTable object. <outFile>: output file name. Return: exkaldi feature or index table object. ''' feats,fmllrMats,utt2spks,outFiles = check_multiple_resources(feat,fmllrMat,utt2spk,outFile=outFile) names = [] for index,feat,fmllrMat,utt2spk in zip(range(len(outFiles)),feats,fmllrMats,utt2spks): # verify data declare.is_feature("feat",feat) declare.is_fmllr_matrix("fmllrMat",fmllrMat) # verify utt2spk declare.is_potential_list_table("utt2spk",utt2spk) names.append(f"fmllr({feat.name},{fmllrMat.name})") cmdPattern = 'transform-feats --utt2spk=ark:{utt2spk} {transMat} {feat} ark:{outFile}' resources = {"feat":feats,"transMat":fmllrMats,"utt2spk":utt2spks,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def transform_feat(feat,matFile,outFile=None): ''' Transform feat by a transform matrix. Typically,LDA,MLLT matrices. Note that is you want to transform FMLLR,use exkaldi.use_fmllr() function. Share Args: Null Parallel Args: <feat>: exkaldi feature or index table object. <matFile>: file name. <outFile>: output file name. Return: exkaldi feature or index table object. ''' feats,matFiles,outFiles = check_multiple_resources(feat,matFile,outFile=outFile) names = [] for feat,matFile in zip(feats,matFiles): declare.is_feature("feat",feat) declare.is_file("matFile",matFile) names.append( f"tansform({feat.name})" ) cmdPattern = 'transform-feats {matFile} {feat} ark:{outFile}' resources = {"feat":feats,"matFile":matFiles,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def paste_feature(feats): ''' Paste feature in feature dimension. Args: <feats>: a list of feature objects. Return: a new feature object. ''' declare.kaldi_existed() assert isinstance(feats,(list,tuple)) and len(feats) > 0 for fe in feats: declare.is_feature("feats", fe) allResp = [] pastedName = [] with FileHandleManager() as fhm: for ot in feats: if isinstance(ot,BytesFeat): temp = fhm.create("wb+",suffix=".ark") ot.sort(by="utt").save(temp) allResp.append( f"ark:{temp.name}" ) elif isinstance(ot,NumpyFeat): temp = fhm.create("wb+",suffix=".ark") ot.sort(by="utt").to_bytes().save(temp) allResp.append( f"ark:{temp.name}" ) else: temp = fhm.create("w+",suffix=".scp") ot.sort(by="utt").save(temp) allResp.append( f"scp:{temp.name}" ) pastedName.append( ot.name ) allResp = " ".join(allResp) cmd = f"paste-feats {allResp} ark:-" out,err,cod = run_shell_command(cmd,stdin="PIPE",stdout="PIPE",stderr="PIPE") if cod != 0 or out == b'': raise KaldiProcessError("Failed to paste feature.",err.decode()) else: pastedName = ",".join(pastedName) pastedName = f"paste({pastedName})" # New index table need to be generated later. return BytesFeat(out,name=pastedName,indexTable=None)
def splice_feature(feat,left,right=None,outFile=None): ''' Splice left-right N frames to generate new feature. The dimentions will become original-dim * (1 + left + right) Share Args: Null Parrallel Args: <feat>: feature or index table object. <left>: the left N-frames to splice. <right>: the right N-frames to splice. If None,right = left. <outFile>; output file name. Return: exkaldi feature object or index table object. ''' feats,lefts,rights,outFiles = check_multiple_resources(feat,left,right,outFile=outFile) names = [] for index,feat,left,right in zip(range(len(outFiles)),feats,lefts,rights): # check feature declare.is_feature("feat",feat) # check left declare.is_non_negative_int("left",left) # check right if right is None: assert left != 0,f"At least one of <left> or <right> is valid but got:{left},{right}." rights[index] = left else: declare.is_non_negative_int("right",right) assert left != 0,"Al least one of <left> or <right> is not 0." assert not (left == 0 and right == 0),f"At least one of <left> or <right> is valid but got:{left},{right}." names.append( f"splice({feat.name},{left},{right})" ) # prepare command pattern and resources cmdPattern = "splice-feats --left-context={left} --right-context={right} {feat} ark:{outFile}" resources = {"feat":feats,"left":lefts,"right":rights,"outFile":outFiles} # run return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def use_cmvn(feat,cmvn,utt2spk=None,std=False,outFile=None): ''' Apply CMVN statistics to feature. Share Args: Null Parrallel Args: <feat>: exkaldi feature or index table object. <cmvn>: exkaldi CMVN statistics or index object. <utt2spk>: file path or ListTable object. <std>: If true,apply std normalization. <outFile>: out file name. Return: feature or index table object. ''' feats,cmvns,utt2spks,stds,outFiles = check_multiple_resources(feat,cmvn,utt2spk,std,outFile=outFile) names = [] for i,feat,cmvn,utt2spk,std in zip(range(len(outFiles)),feats,cmvns,utt2spks,stds): # verify feature and cmvn declare.is_feature("feat",feat) declare.is_cmvn("cmvn",cmvn) # verify utt2spk if utt2spk is not None: declare.is_potential_list_table("utt2spk",utt2spk) # std declare.is_bool("std",std) #stds[i] = "true" if std else "false" names.append( f"cmvn({feat.name},{cmvn.name})" ) if utt2spks[0] is None: cmdPattern = 'apply-cmvn --norm-vars={std} {cmvn} {feat} ark:{outFile}' resources = {"feat":feats,"cmvn":cmvns,"std":stds,"outFile":outFiles} else: cmdPattern = 'apply-cmvn --norm-vars={std} --utt2spk=ark:{utt2spk} {cmvn} {feat} ark:{outFile}' resources = {"feat":feats,"cmvn":cmvns,"utt2spk":utt2spks,"std":stds,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)