Exemplo n.º 1
0
	def add_penalty(self, penalty=0):
		'''
		Add penalty to lattice.

		Args:
			<penalty>: penalty.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice to scale.')

		assert isinstance(penalty, (int,float)) and penalty >= 0, "Expected <penalty> is positive int or float value."
		
		cmd = f"lattice-add-penalty --word-ins-penalty={penalty} ark:- ark:-"

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to add penalty.")
		else:
			newName = f"add_penalty({self.name})"
			return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
Exemplo n.º 2
0
	def determinize(self, acwt=1.0, beam=6):
		'''
		Determinize the lattice.

		Args:
			<acwt>: acoustic scale.
			<beam>: prune beam.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		assert isinstance(acwt, float) and acwt >= 0, "Expected <acwt> is positive float value."
		assert isinstance(beam, int) and beam >= 0, "Expected <beam> is positive int value."
		
		cmd = f"lattice-determinize-pruned --acoustic-scale={acwt} --beam={beam} ark:- ark:-"

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to determinize lattice.")
		else:
			newName = f"determinize({self.name})"
			return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)		
Exemplo n.º 3
0
def use_cmvn_sliding(feat,windowSize=None,std=False):
	'''
	Allpy sliding CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<windowSize>: windows size,If None,use windows size greater_equal than the frames of feature.
		<std>: a bool value.
	
	Return:
		exkaldi feature object.
	'''
	declare.is_classes("feat",feat, ["BytesFeature","NumpyFeature"])
	declare.is_bool("std",std)

	if windowSize is None:
		featLen = feat.lens[1]
		maxLen = max([length for utt,length in featLen])
		windowSize = math.ceil(maxLen/100)*100
	else:
		declare.is_positive_int("windowSize",windowSize)

	if std:
		std='true'
	else:
		std='false'

	cmd = f'apply-cmvn-sliding --cmn-window={windowSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-'
	out,err,cod = run_shell_command(cmd,stdin="PIPE",stderr="PIPE",stdout="PIPE",inputs=feat.data)
	if cod != 0:
		print(err.decode())
		raise KaldiProcessError("Failed to compute sliding cmvn.")
	
	newName = f"cmvn({feat.name},{windowSize})"
	return BytesFeature(out,name=newName,indexTable=None)
Exemplo n.º 4
0
def compute_postprob_norm(ali, posrProbDim):
    '''
	Compute alignment counts in order to normalize acoustic model posterior probability.
	For more help information, look at the Kaldi <analyze-counts> command.

	Args:
		<ali>: exkaldi NumpyAlignmentPhone or NumpyAlignmentPdf object.
		<posrProbDim>: the dimensionality of posterior probability.
	Return:
		A numpy array of the normalization.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(ali) in ["NumpyAlignmentPhone", "NumpyAlignmentPdf"]:
        pass
    else:
        raise UnsupportedType(
            f'Expected exkaldi AlignmentPhone or  but got a {type_name(ali)}.')

    cmd = f"analyze-counts --print-args=False --verbose=0 --binary=false --counts-dim={posrProbDim} ark:- -"
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=ali.data)
    if (isinstance(cod, int) and cod != 0) or out == b"":
        print(err.decode())
        raise KaldiProcessError('Analyze counts defailed.')
    else:
        out = out.decode().strip().strip("[]").strip().split()
        counts = np.array(out, dtype=np.int32)
        countBias = np.log(counts / np.sum(counts))
        return countBias
Exemplo n.º 5
0
def use_fmllr(feat, transMatrix, utt2spkFile):
    '''
	Transform feat by a transform matrix. Typically, LDA, MLLt matrixes.

	Args:
		<feat>: exkaldi feature object.
		<transFile>: exkaldi fMLLR transform matrix object.
		<utt2spkFile>: utt2spk file name.
	
	Return:
		a new exkaldi feature object.
	'''
    if type_name(feat) == "BytesFeature":
        bytesFlag = True
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        bytesFlag = False
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"<feat> should exkaldi feature object but got: {type_name(feat)}."
        )

    if type_name(transMatrix) == "BytesFmllrMatrix":
        transMatrix = transMatrix.sort(by="utt")
    elif type_name(transMatrix) == "NumpyFmllrMatrix":
        transMatrix = transMatrix.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"<transMatrix> should exkaldi fMLLR transform matrix object but got: {type_name(transMatrix)}."
        )

    transTemp = tempfile.NamedTemporaryFile("wb+", suffix="_trans.ark")
    try:
        transTemp.write(transMatrix.data)
        transTemp.seek(0)

        cmd = f'transform-feats --utt2spk=ark:{utt2spkFile} ark:{transTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if cod != 0:
            print(err.decode())
            raise KaldiProcessError(
                "Failed to transform feature to fMLLR feature.")
        else:
            newName = f"fmllr({feat.name})"
            newFeat = BytesFeature(out, name=newName)
            if bytesFlag:
                return newFeat
            else:
                return newFeat.to_numpy()
    finally:
        transTemp.close()
Exemplo n.º 6
0
def compute_cmvn_stats(feat, spk2utt=None, name="cmvn"):
    '''
	Compute CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<spk2utt>: spk2utt file or exkaldi ScriptTable object.
		<name>: a string.

	Return:
		A exkaldi CMVN statistics object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort("utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort("utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is a exkaldi feature object but got {type_name(feat)}."
        )

    spk2uttTemp = tempfile.NamedTemporaryFile("w+", encoding="utf-8")
    try:
        if spk2utt is None:
            cmd = 'compute-cmvn-stats ark:- ark:-'
        else:
            if isinstance(spk2utt, str):
                if not os.path.isfile(spk2utt):
                    raise WrongPath(f"No such file:{spk2utt}.")
                spk2uttSorted = ScriptTable(
                    name="spk2utt").load(spk2utt).sort()
                spk2uttSorted.save(spk2uttTemp)
            elif isinstance(spk2utt, ScriptTable):
                spk2uttSorted = spk2utt.sort()
                spk2uttSorted.save(spk2uttTemp)
            else:
                raise UnsupportedType(
                    f"<spk2utt> should be a file path or ScriptTable object but got {type_name(spk2utt)}."
                )
            spk2uttTemp.seek(0)

            cmd = f'compute-cmvn-stats --spk2utt=ark:{spk2uttTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to compute CMVN statistics.')
        else:
            return BytesCMVNStatistics(out, name, indexTable=None)
    finally:
        spk2uttTemp.close()
Exemplo n.º 7
0
	def am_rescore(self, hmm, feat):
		"""
		Replace the acoustic scores with new HMM-GMM model.
		"""
		'''
		Determinize the lattice.

		Args:
			<hmm>: exkaldi HMM object or file path.

		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		try:
			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]:
				hmmTemp.write(hmm.data)
				hmmTemp.seek(0)
				hmmFile = hmmTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.")
	
			if type_name(feat) == "BytesFeature":
				feat = feat.sort(by="utt")
			elif type_name(feat) == "NumpyFeature":
				feat = feat.sort(by="utt").to_numpy()
			else:
				raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.")

			featTemp.write(feat.data)
			featTemp.seek(0)
			featFile = featTemp.name

			cmd = f"gmm-rescore-lattice	{hmmFile} ark:- ark:{featFile} ark:-"

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError("Failed to determinize lattice.")
			else:
				newName = f"am_rescore({self.name})"
				return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
		finally:
			hmmTemp.close()
			featTemp.close()
Exemplo n.º 8
0
def use_cmvn_sliding(feat, windowsSize=None, std=False):
    '''
	Allpy sliding CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<windowsSize>: windows size, If None, use windows size larger than the frames of feature.
		<std>: a bool value.

	Return:
		An exkaldi feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if isinstance(feat, BytesFeature):
        pass
    elif type_name(feat) == "NumpyFeature":
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is an exkaldi feature object but got {type_name(feat)}."
        )

    if windowsSize == None:
        featLen = feat.lens[1]
        maxLen = max([length for utt, length in featLen])
        windowsSize = math.ceil(maxLen / 100) * 100
    else:
        assert isinstance(windowsSize,
                          int), "Expected <windowsSize> is an int value."

    if std == True:
        std = 'true'
    else:
        std = 'false'

    cmd = f'apply-cmvn-sliding --cmn-window={windowsSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-'
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)
    if (isinstance(cod, int) and cod != 0) or out == b'':
        print(err.decode())
        raise KaldiProcessError('Failed to use sliding CMVN.')
    else:
        newName = f"cmvn({feat.name},{windowsSize})"
        return BytesFeature(out, newName, indexTable=None)
Exemplo n.º 9
0
    def loadArkScpFile(fileName, suffix):
        declare.kaldi_existed()

        if suffix == "ark":
            cmd = 'copy-feats ark:'
        else:
            cmd = 'copy-feats scp:'

        cmd += '{} ark:-'.format(fileName)
        out, err, cod = run_shell_command(cmd, stdout="PIPE", stderr="PIPE")
        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to read archive table.')
        else:
            #if sys.getsizeof(out) > 10000000000:
            #    print('Warning: Data is extramely large. We don't recommend use load_index_table to replace it.')
            return out
Exemplo n.º 10
0
def transform_feat(feat, matrixFile):
    '''
	Transform feat by a transform matrix. Typically, LDA, MLLt matrixes.

	Args:
		<feat>: exkaldi feature object.
		<matrixFile>: file name.
	
	Return:
		a new exkaldi feature object.
	'''
    assert isinstance(
        matrixFile, str
    ), f"<transformMatrix> should be a file path but got: {type_name(matrixFile)}."
    if not os.path.isfile(matrixFile):
        raise WrongPath(f"No such file: {matrixFile}.")

    if type_name(feat) == "BytesFeature":
        bytesFlag = True
    elif type_name(feat) == "NumpyFeature":
        bytesFlag = False
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"<feat> should exkaldi feature object but got: {type_name(feat)}."
        )

    cmd = f'transform-feats {matrixFile} ark:- ark:-'

    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)

    if cod != 0:
        print(err.decode())
        raise KaldiProcessError("Failed to transform feature.")
    else:
        newName = f"tansform({feat.name})"
        newFeat = BytesFeature(out, name=newName)
        if bytesFlag:
            return newFeat
        else:
            return newFeat.to_numpy()
Exemplo n.º 11
0
 def transform(data, cmd):
     out, err, cod = run_shell_command(cmd,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       inputs=data)
     if (isinstance(cod, int) and cod != 0) and out == b'':
         print(err.decode())
         raise KaldiProcessError('Failed to transform alignment.')
     else:
         result = {}
         sp = BytesIO(out)
         for line in sp.readlines():
             line = line.decode()
             line = line.strip().split()
             utt = line[0]
             matrix = np.array(line[1:], dtype=np.int32)
             result[utt] = matrix
         return results
Exemplo n.º 12
0
    def loadArkScpFile(fileName, suffix):
        ExkaldiInfo.vertify_kaldi_existed()

        if suffix == "ark":
            cmd = 'copy-feats ark:'
        else:
            cmd = 'copy-feats scp:'

        cmd += '{} ark:-'.format(fileName)
        out, err, cod = run_shell_command(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE)
        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Copy feat defeated.')
        else:
            #if sys.getsizeof(out) > 10000000000:
            #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
            return BytesMatrix(out)
Exemplo n.º 13
0
def compute_postprob_norm(ali, probDims):
    '''
	Compute alignment counts in order to normalize acoustic model posterior probability.
	For more help information,look at the Kaldi <analyze-counts> command.

	Args:
		<ali>: exkaldi NumpyAlignmentTrans,NumpyAlignmentPhone or NumpyAlignmentPdf object.
		<probDims>: the dimensionality of posterior probability.
		
	Return:
		A numpy array of the normalization.
	'''
    declare.kaldi_existed()
    declare.is_classes(
        "ali", ali,
        ["NumpyAlignmentTrans", "NumpyAlignmentPhone", "NumpyAlignmentPdf"])
    declare.is_positive_int("probDims", probDims)

    txt = []
    for key, vlaue in ali.items():
        value = " ".join(map(str, vlaue.tolist()))
        txt.append(key + " " + value)
    txt = "\n".join(txt)

    cmd = f"analyze-counts --binary=false --counts-dim={probDims} ark:- -"
    out, err, cod = run_shell_command(cmd,
                                      stdin="PIPE",
                                      stdout="PIPE",
                                      stderr="PIPE",
                                      inputs=txt)
    if (isinstance(cod, int) and cod != 0) or out == b"":
        print(err.decode())
        raise KaldiProcessError('Analyze counts defailed.')
    else:
        out = out.decode().strip().strip("[]").strip().split()
        counts = np.array(out, dtype=np.float32)
        countBias = np.log(counts / np.sum(counts))
        return countBias
Exemplo n.º 14
0
	def scale(self, acwt=1, invAcwt=1, ac2lm=0, lmwt=1, lm2ac=0):
		'''
		Scale lattice.

		Args:
			<acwt>: acoustic scale.
			<invAcwt>: inverse acoustic scale.
			<ac2lm>: acoustic to lm scale.
			<lmwt>: language lm scale.
			<lm2ac>: lm scale to acoustic.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice to scale.')           

		for x in [acwt, invAcwt, ac2lm, lmwt, lm2ac]:
			assert x >= 0, "Expected scale is positive value."
		
		cmd = 'lattice-scale'
		cmd += ' --acoustic-scale={}'.format(acwt)
		cmd += ' --acoustic2lm-scale={}'.format(ac2lm)
		cmd += ' --inv-acoustic-scale={}'.format(invAcwt)
		cmd += ' --lm-scale={}'.format(lmwt)
		cmd += ' --lm2acoustic-scale={}'.format(lm2ac)
		cmd += ' ark:- ark:-'

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to scale lattice.")
		else:
			newName = f"scale({self.name})"
			return Lattice(data=out,wordSymbolTable=self.wordSymbolTable,hmm=self.hmm,name=newName)
Exemplo n.º 15
0
def nn_align(hmm, prob, trainGraphFile, transitionScale=1.0, acousticScale=0.1, 
				selfloopScale=0.1, beam=10, retry_beam=40, name="ali"):
	'''
	Align the neural network acoustic output probability.
	'''
	if type_name(prob) == "BytesProbability":
		pass
	elif type_name(prob) == "NumpyProbability":
		prob = prob.to_bytes()
	else:
		raise UnsupportedType(f"Expected <prob> is an exkaldi probability object but got: {type_name(prob)}.")

	hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
	try:
		if isinstance(hmm,str):
			assert os.path.isfile(hmm), f"No such file: {hmm}."
			hmmFile = hmm
		else:
			assert type_name(hmm) in ["BaseHMM","MonophoneHMM","TriphoneHMM"], f"<hmm> should be exkaldi HMM object but got: {hmm}."
			hmmTemp.write(hmm.data)
			hmmTemp.seek(0)
			hmmFile = hmmTemp.name
		
		cmd = f"align-compiled-mapped --transition-scale={transitionScale} --acoustic-scale={acousticScale} --self-loop-scale={selfloopScale} "
		cmd += f"--beam={beam} --retry-beam={retry_beam} {hmmFile} ark:{trainGraphFile} ark:- ark:-"

		out,err,cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=prob.data)

		if cod != 0:
			print(err.decode())
			raise KaldiProcessError("Failed to align probability.")
		else:
			return BytesAlignmentTrans(out,name=name)
	
	finally:
		hmmTemp.close()
Exemplo n.º 16
0
def wer(ref, hyp, ignore=None, mode='all'):
    '''
	Compute WER (word error rate) between <ref> and <hyp>. 

	Args:
		<ref>,<hyp>: exkaldi transcription object or file path.
		<ignore>: ignore a symbol.
		<mode>: "all" or "present".

	Return:
		a namedtuple of score information.
	'''
    declare.is_potential_transcription("ref", ref)
    declare.is_potential_transcription("hyp", hyp)
    declare.is_instances("mode", mode, ['all', 'present'])
    declare.kaldi_existed()

    if ignore is not None:
        declare.is_valid_string("ignore", ignore)

    with FileHandleManager() as fhm:

        if ignore is None:

            if type_name(hyp) == "Transcription":
                hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
                hyp.save(hypTemp)
                hyp = hypTemp.name

            if type_name(ref) == "Transcription":
                refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
                ref.save(refTemp)
                ref = refTemp.name

            cmd = f'compute-wer --text --mode={mode} ark:{ref} ark,p:{hyp}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")

        else:
            # remove the ingored symbol in hyp
            if type_name(hyp) == "Transcription":
                hyp = hyp.save()
            else:
                with open(hyp, "r", encoding="utf-8") as fr:
                    hyp = fr.read()
            hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
            cmd = f'sed "s/{ignore} //g" > {hypTemp.name}'
            hypOut, err, _ = run_shell_command(cmd,
                                               stdin="PIPE",
                                               stdout="PIPE",
                                               stderr="PIPE",
                                               inputs=hyp)
            if len(hypOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<hyp> has wrong data formation.")
            # remove the ingored symbol in ref
            if type_name(ref) == "Transcription":
                ref = ref.save()
            else:
                with open(ref, "r", encoding="utf-8") as fr:
                    ref = fr.read()
            refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
            cmd = f'sed "s/{ignore} //g" > {refTemp.name}'
            refOut, err, cod = run_shell_command(cmd,
                                                 stdin="PIPE",
                                                 stdout="PIPE",
                                                 stderr="PIPE",
                                                 inputs=ref)
            if cod != 0 or len(refOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<ref> has wrong data formation.")
            # score
            cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")

    if len(scoreOut) == 0:
        print(scoreErr.decode())
        raise KaldiProcessError("Failed to compute WER.")
    else:
        out = scoreOut.decode().split("\n")
        pattern1 = '%WER (.*) \[ (.*) \/ (.*),(.*) ins,(.*) del,(.*) sub \]'
        pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]"
        pattern3 = "Scored (.*) sentences,(.*) not present in hyp."
        s1 = re.findall(pattern1, out[0])[0]
        s2 = re.findall(pattern2, out[1])[0]
        s3 = re.findall(pattern3, out[2])[0]

        return namedtuple("Score", [
            "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences",
            "wrongSentences", "missedSentences"
        ])(
            float(s1[0]),  #WER
            int(s1[2]),  #words
            int(s1[3]),  #ins
            int(s1[4]),  #del
            int(s1[5]),  #sub
            float(s2[0]),  #SER
            int(s2[1]),  #sentences
            int(s2[2]),  #wrong sentences
            int(s3[1])  #missed sentences
        )
Exemplo n.º 17
0
def use_cmvn(feat, cmvn, utt2spk=None, std=False):
    '''
	Apply CMVN statistics to feature.

	Args:
		<feat>: exkaldi feature object.
		<cmvn>: exkaldi CMVN statistics object.
		<utt2spk>: utt2spk file path or ScriptTable object.
		<std>: If true, apply std normalization.

	Return:
		A new feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi feature but got {type_name(feat)}.")

    if type_name(cmvn) == "BytesCMVNStatistics":
        cmvn = cmvn.sort(by="utt")
    elif type_name(cmvn) == "NumpyCMVNStatistics":
        cmvn = cmvn.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi CMVN statistics but got {type_name(cmvn)}.")

    cmvnTemp = tempfile.NamedTemporaryFile('wb+', suffix='_cmvn.ark')
    utt2spkTemp = tempfile.NamedTemporaryFile('w+',
                                              suffix="_utt2spk",
                                              encoding="utf-8")
    try:
        cmvnTemp.write(cmvn.data)
        cmvnTemp.seek(0)

        if std is True:
            stdOption = " --norm-vars true"
        else:
            stdOption = ""

        if utt2spk is None:
            cmd = f'apply-cmvn{stdOption} ark:{cmvnTemp.name} ark:- ark:-'
        else:
            if isinstance(utt2spk, str):
                if not os.path.isfile(utt2spk):
                    raise WrongPath(f"No such file:{utt2spk}.")
                utt2spkSorted = ScriptTable(
                    name="utt2spk").load(utt2spk).sort()
                utt2spkSorted.save(utt2spkTemp)
            elif isinstance(utt2spk, ScriptTable):
                utt2spkSorted = utt2spk.sort()
                utt2spkSorted.save(utt2spkTemp)
            else:
                raise UnsupportedType(
                    f"<utt2spk> should be a file path or ScriptTable object but got {type_name(utt2spk)}."
                )
            utt2spkTemp.seek(0)

            cmd = f'apply-cmvn{stdOption} --utt2spk=ark:{utt2spkTemp.name} ark:{cmvnTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to apply CMVN statistics.')
        else:
            newName = f"cmvn({feat.name},{cmvn.name})"
            if type_name(feat) == "NumpyFeature":
                return BytesFeature(out, newName, indexTable=None).to_numpy()
            else:
                return BytesFeature(out, newName, indexTable=None)
    finally:
        cmvnTemp.close()
        utt2spkTemp.close()
Exemplo n.º 18
0
def __compute_feature(wavFile, kaldiTool, useSuffix=None, name="feat"):

    if useSuffix != None:
        assert isinstance(useSuffix, str), "Expected <useSuffix> is a string."
        useSuffix = useSuffix.strip().lower()[-3:]
    else:
        useSuffix = ""
    assert useSuffix in ["", "scp",
                         "wav"], 'Expected <useSuffix> is "scp" or "wav".'

    ExkaldiInfo.vertify_kaldi_existed()

    wavFileTemp = tempfile.NamedTemporaryFile("w+",
                                              suffix=".scp",
                                              encoding="utf-8")
    try:
        if isinstance(wavFile, str):
            if os.path.isdir(wavFile):
                raise WrongOperation(
                    f'Expected <wavFile> is file path but got a directory:{wavFile}.'
                )
            else:
                out, err, cod = run_shell_command(f'ls {wavFile}',
                                                  stdout=subprocess.PIPE,
                                                  stderr=subprocess.PIPE)
                if out == b'':
                    raise WrongPath(f"No such file:{wavFile}.")
                else:
                    allFiles = out.decode().strip().split('\n')
        elif isinstance(wavFile, ScriptTable):
            wavFile = wavFile.sort()
            wavFile.save(wavFileTemp)
            allFiles = [
                wavFileTemp.name,
            ]
        else:
            raise UnsupportedType(
                f'Expected filename-like string but got a {type_name(wavFile)}.'
            )

        results = []
        for wavFile in allFiles:
            wavFile = os.path.abspath(wavFile)
            if wavFile[-3:].lower() == "wav":
                dirName = os.path.dirname(wavFile)
                fileName = os.path.basename(wavFile)
                uttID = "".join(fileName[0:-4].split("."))
                cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-"
            elif wavFile[-3:].lower() == 'scp':
                cmd = f"{kaldiTool} scp,p:{wavFile} ark:-"
            elif "wav" in useSuffix:
                dirName = os.path.dirname(wavFile)
                fileName = os.path.basename(wavFile)
                uttID = "".join(fileName[0:-4].split("."))
                cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-"
            elif "scp" in useSuffix:
                cmd = f"{kaldiTool} scp,p:{wavFile} ark:-"
            else:
                raise UnsupportedType(
                    'Unknown file suffix. You can declare it by making <useSuffix> "wav" or "scp".'
                )

            out, err, cod = run_shell_command(cmd,
                                              stdout=subprocess.PIPE,
                                              stderr=subprocess.PIPE)
            if (isinstance(out, int) and cod != 0) or out == b'':
                print(err.decode())
                raise KaldiProcessError(f'Failed to compute feature:{name}.')
            else:
                results.append(BytesFeature(out))
    finally:
        wavFileTemp.close()

    if len(results) == 0:
        raise WrongOperation("No any feature date in file path.")
    else:
        result = results[0]
        for i in results[1:]:
            result += i
        result.rename(name)
        return result
Exemplo n.º 19
0
def gmm_decode(feat, hmm, HCLGFile, wordSymbolTable, beam=10, latBeam=8, acwt=1,
				minActive=200, maxActive=7000, maxMem=50000000, config=None, maxThreads=1):
	'''
	Decode by generating lattice from feature and GMM model.

	Args:
		<feat>: An exkaldi feature object.
		<hmm>: An exkaldi HMM object or file path.
		<HCLGFile>: HCLG file path.
		<wordSymbolTable>: words.txt file path or exkaldi LexiconBank object or exkaldi ListTable object.
		<beam>: beam size.
		<latBeam>: lattice beam size.
		<acwt>: acoustic model weight.
		<minActivate>: .
		<maxActive>: .
		<maxMem>: .
		<config>: decode configure file.
		<maxThreads>: the number of mutiple threads.
		
		Some usual options can be assigned directly. If you want use more, set <config> = your-configure, but if you do this, these usual configures we provided will be ignored.
		You can use .check_config('gmm_decode') function to get configure information you could set.
		Also run shell command "gmm-latgen-faster" to look their meaning.
	Return:
		An exkaldi Lattice object.
	''' 
	ExkaldiInfo.vertify_kaldi_existed()

	if type_name(feat) == "BytesFeature":
		pass
	elif type_name(feat) == "NumpyFeature":
		feat = feat.to_bytes()
	else:
		raise UnsupportedType(f"Expected <feat> is an exkaldi feature object but got: {type_name(feat)}.")
		
	assert isinstance(HCLGFile, str), "<HCLGFile> should be a file path."
	if not os.path.isfile(HCLGFile):
		raise WrongPath(f"No such file:{HCLGFile}")

	if maxThreads > 1:
		kaldiTool = f"gmm-latgen-faster-parallel --num-threads={maxThreads} "
	else:
		kaldiTool = "gmm-latgen-faster " 

	kaldiTool += f'--allow-partial=true '
	kaldiTool += f'--min-active={minActive} '
	kaldiTool += f'--max-active={maxActive} '  
	kaldiTool += f'--max_mem={maxMem} '
	kaldiTool += f'--beam={beam} '
	kaldiTool += f'--lattice-beam={latBeam} '
	kaldiTool += f'--acoustic-scale={acwt} '

	wordsTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")
	modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")

	try:
		if type_name(wordSymbolTable) == "LexiconBank":
			wordSymbolTable.dump_dict("words", wordsTemp)
			wordsFile = wordsTemp.name
		elif type_name(wordSymbolTable) == "ListTable":
			wordSymbolTable.save(wordsTemp)
			wordsTemp.seek(0)
			wordsFile = wordsTemp.name
		elif isinstance(wordSymbolTable, str):
			if not os.path.isfile(wordSymbolTable):
				raise WrongPath(f"No such file:{wordSymbolTable}.")
			else:
				wordsFile = wordSymbolTable
		else:
			raise UnsupportedType(f"<wordSymbolTable> should be a file path or exkaldi LexiconBank object but got {type_name(wordSymbolTable)}.")

		kaldiTool += f'--word-symbol-table={wordsFile} '

		if config is not None:
			if check_config(name='gmm_decode', config=config):
				for key,value in config.items():
					if isinstance(value, bool):
						if value is True:
							kaldiTool += f"{key} "
					else:
						kaldiTool += f" {key}={value}"

		if type_name(hmm) in ["MonophoneHMM", "TriphoneHMM"]:
			modelTemp.write(hmm.data)
			modelTemp.seek(0)
			hmmFile = modelTemp.name
		elif isinstance(hmm, str):
			if not os.path.isfile(hmm):
				raise WrongPath(f"No such file:{hmm}.")
			else:
				hmmFile = hmm
		else:
			raise UnsupportedType(f"<hmm> should be exkaldi HMM object or file path but got {type_name(hmm)}.")
		
		cmd = f'{kaldiTool} {hmmFile} {HCLGFile} ark:- ark:-'
		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError('Failed to generate lattice.')
		else:
			newName = f"lat({feat.name})"
			return Lattice(data=out, name=newName)
	
	finally:
		wordsTemp.close()
		modelTemp.close()
Exemplo n.º 20
0
def train_ngrams_srilm(lexicons, order, textFile, outFile, config=None):
    '''
	Train n-grams language model with Srilm tookit.

	Args:
		<lexicons>: words.txt file path or Exkaldi LexiconBank object.
		<order>: the maxinum order of n-grams.
		<textFile>: text corpus file.
		<outFile>: ARPA out file name.
		<config>: configures, a Python dict object.

	You can use .check_config("train_ngrams_srilm") function to get configure information that you can set.
	Also you can run shell command "lmplz" to look their meaning.
	'''
    assert isinstance(
        order, int
    ) and order > 0 and order < 10, "Expected <n> is a positive int value and it must be smaller than 10."
    assert isinstance(textFile,
                      str), "Expected <textFile> is name-like string."
    assert isinstance(outFile, str), "Expected <outFile> is name-like string."
    assert type_name(
        lexicons
    ) == "LexiconBank", f"Expected <lexicons> is exkaldi LexiconBank object but got {type_name(lexicons)}."

    ExkaldiInfo.prepare_srilm()

    if not os.path.isfile(textFile):
        raise WrongPath(f"No such file:{textFile}")
    else:
        ## Should check the numbers of lines
        cmd = f"shuf {textFile} -n 100"
        out, err, cod = run_shell_command(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE)
        if (isinstance(cod, int) and cod != 0):
            print(err.decode())
            raise ShellProcessError("Failed to sample from text file.")
        elif out == b'':
            raise WrongDataFormat("Void text file.")
        else:
            out = out.decode().strip().split("\n")
            spaceCount = 0
            for line in out:
                spaceCount += line.count(" ")
            if spaceCount < len(out) // 2:
                raise WrongDataFormat(
                    "The text file doesn't seem to be separated by spaces or extremely short."
                )

    wordlist = tempfile.NamedTemporaryFile("w+",
                                           encoding='utf-8',
                                           suffix=".txt")
    unkSymbol = lexicons("oov")
    try:
        lexiconp = lexicons("lexiconp")
        words = [x[0] for x in lexiconp.keys()]
        wordlist.write("\n".join(words))
        wordlist.seek(0)

        #cmd2 = f"ngram-count -text {textFile} -order {order}"
        extraConfig = " "
        specifyDiscount = False
        if config is not None:
            if check_config(name='train_ngrams_srilm', config=config):
                for key, value in config.items():
                    if isinstance(value, bool):
                        if value is True:
                            extraConfig += f"{key} "
                        if key.endswith("discount"):
                            specifyDiscount = True
                    else:
                        extraConfig += f" {key} {value}"

        cmd = f"ngram-count -text {textFile} -order {order} -limit-vocab -vocab {wordlist.name} -unk -map-unk {unkSymbol} "
        if specifyDiscount is False:
            cmd += "-kndiscount "
        cmd += "-interpolate "

        if not outFile.rstrip().endswith(".arpa"):
            outFile += ".arpa"
        make_dependent_dirs(outFile, pathIsFile=True)

        cmd += f" -lm {outFile}"

        out, err, cod = run_shell_command(cmd, stderr=subprocess.PIPE)

        if (isinstance(cod, int) and cod != 0) or (
                not os.path.isfile(outFile)) or os.path.getsize(outFile) == 0:
            print(err.decode())
            if os.path.isfile(outFile):
                os.remove(outFile)
            raise KaldiProcessError(
                f'Failed to generate ngrams language model.')
        else:
            return os.path.abspath(outFile)

    finally:
        wordlist.close()
Exemplo n.º 21
0
	def get_nbest(self, n, wordSymbolTable=None, hmm=None, acwt=1, phoneLevel=False, requireAli=False, requireCost=False):
		'''
		Get N best result with text formation.

		Args:
			<n>: n best results.
			<wordSymbolTable>: file or ListTable object or LexiconBank object.
			<hmm>: file or HMM object.
			<acwt>: acoustic weight.
			<phoneLevel>: If True, return phone results.
			<requireAli>: If True, return alignment simultaneously.
			<requireCost>: If True, return acoustic model and language model cost simultaneously.

		Return:
			A list of exkaldi Transcription objects (and their Metric objects).
		'''
		assert isinstance(n, int) and n > 0, "Expected <n> is a positive int value."
		assert isinstance(acwt, (int,float)) and acwt > 0, "Expected <acwt> is a positive int or float value."
	
		if self.is_void:
			raise WrongOperation('No any data in lattice.')
		
		if wordSymbolTable is None:
			assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable."
			wordSymbolTable = self.wordSymbolTable
		
		if hmm is None:
			assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable."
			hmm = self.hmm

		wordSymbolTemp = tempfile.NamedTemporaryFile('w+', suffix="_words.txt", encoding='utf-8')
		modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		outAliTemp = tempfile.NamedTemporaryFile('w+', suffix=".ali", encoding='utf-8')
		outCostFile_LM = tempfile.NamedTemporaryFile('w+', suffix=".cost", encoding='utf-8')
		outCostFile_AM = tempfile.NamedTemporaryFile('w+', suffix=".cost", encoding='utf-8')

		try:
			if isinstance(wordSymbolTable, str):
				assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}."
				wordsFile = wordSymbolTable
			elif type_name(wordSymbolTable) == "LexiconBank":
				if phoneLevel:
					wordSymbolTable.dump_dict("phones", wordSymbolTemp)
				else:
					wordSymbolTable.dump_dict("words", wordSymbolTemp)
				wordsFile = wordSymbolTemp.name
			elif type_name(wordSymbolTable) == "ListTable":
				wordSymbolTable.save(wordSymbolTemp)
				wordSymbolTemp.seek(0)
				wordsFile = wordSymbolTemp.name
			else:
				raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.")

			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]:
				hmm.save(modelTemp)
				hmmFile = modelTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.")

			if phoneLevel:
				cmd = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | '
			else:
				cmd = ""			

			cmd += f'lattice-to-nbest --acoustic-scale={acwt} --n={n} ark:- ark:- |'
			cmd += f'nbest-to-linear ark:- ark,t:{outAliTemp.name} ark,t:-'   
			
			if requireCost:
				cmd += f' ark,t:{outCostFile_LM.name} ark,t:{outCostFile_AM.name}'

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)
			
			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError('Failed to get N best results.')
			
			def sperate_n_bests(data):
				results	= {}
				for index,trans in enumerate(data):
					trans = trans.strip().split(maxsplit=1)
					if len(trans) == 0:
						continue
					name = trans[0][0:(trans[0].rfind("-"))]
					if len(trans) == 1:
						res = " "
					else:
						res = trans[1]
					if not name in results.keys():
						results[name] = [res,]
					else:
						results[name].append(res)
				
				finalResults = []
				for uttID, nbests in results.items():
					for index, one in enumerate(nbests):
						if index > len(finalResults)-1:
							finalResults.append({})
						finalResults[index][uttID] = one

				return finalResults

			out = out.decode().strip().split("\n")

			out = sperate_n_bests(out)
			NBEST = []
			for index, one in enumerate(out,start=1):
				name = f"{index}-best"
				NBEST.append( Transcription(one, name=name) )
			del out

			if requireCost:
				outCostFile_AM.seek(0)
				lines_AM = outCostFile_AM.read().strip().split("\n")
				lines_AM = sperate_n_bests(lines_AM)
				AMSCORE = []
				for index, one in enumerate(lines_AM, start=1):
					name = f"AM-{index}-best"
					AMSCORE.append( Metric(one, name=name) )
				del lines_AM			

				outCostFile_LM.seek(0)
				lines_LM = outCostFile_LM.read().strip().split("\n")
				lines_LM = sperate_n_bests(lines_LM)
				LMSCORE = []
				for index, one in enumerate(lines_LM, start=1):
					name = f"LM-{index}-best"
					LMSCORE.append( Metric(one, name=name) )
				del lines_LM

				finalResult = [NBEST,AMSCORE,LMSCORE]
			else:
				finalResult = [NBEST,]

			if requireAli:
				ALIGNMENT = []
				outAliTemp.seek(0)
				ali = outAliTemp.read().strip().split("\n")
				ali = sperate_n_bests(ali)
				for index, one in enumerate(ali, start=1):
					name = f"{index}-best"
					temp = {}
					for key, value in one.items():
						value = value.strip().split()
						temp[key] = np.array(value, dtype=np.int32)
					ALIGNMENT.append( NumpyAlignmentTrans(temp, name=name) )
				del ali
				finalResult.append(ALIGNMENT)

			if len(finalResult) == 1:
				finalResult = finalResult[0]

			return finalResult
			 
		finally:
			wordSymbolTemp.close()
			modelTemp.close()
			outAliTemp.close()
			outCostFile_LM.close()
			outCostFile_AM.close()
Exemplo n.º 22
0
def train_ngrams_srilm(lexicons, order, text, outFile, config=None):
    '''
	Train N-Grams language model with SriLM tookit.
	If you don't specified the discount by the <config> option, We defaultly use "kndiscount".

	Args:
		<lexicons>: an exkaldi LexiconBank object.
		<order>: the maximum order of N-Grams.
		<text>: a text corpus file or an exkaldi transcription object.
		<outFile>: output file name of arpa LM.
		<config>: extra configurations, a Python dict object.

	You can use .check_config("train_ngrams_srilm") function to get a reference of extra configurations.
	Also you can run shell command "ngram-count" to look their usage.
	'''
    declare.is_lexicon_bank("lexicons", lexicons)
    declare.is_positive_int("order", order)
    declare.is_potential_transcription("text", text)
    declare.is_valid_file_name("outFile", outFile)
    # verify the max order
    declare.less_equal("order", order, "max order", 9)
    # prepare srilm tool
    ExkaldiInfo.prepare_srilm()

    with FileHandleManager() as fhm:
        # check whether this is a reasonable text corpus that should be splited by space.
        if isinstance(text, str):
            cmd = f"shuf {text} -n 100"
            out, err, cod = run_shell_command(cmd,
                                              stdout="PIPE",
                                              stderr="PIPE")
            if (isinstance(cod, int) and cod != 0):
                print(err.decode())
                raise ShellProcessError(
                    f"Failed to sample from text file:{text}.")
            elif out == b'':
                raise WrongDataFormat(f"Void text file:{text}.")
            else:
                out = out.decode().strip().split("\n")
                spaceCount = 0
                for line in out:
                    spaceCount += line.count(" ")
                if spaceCount < len(out) // 2:
                    raise WrongDataFormat(
                        "The text file doesn't seem to be separated by spaces or sentences are extremely short."
                    )

        else:
            sampleText = text.subset(nRandom=100)
            spaceCount = 0
            for key, value in sampleText.items():
                assert isinstance(
                    value, str
                ), f"Transcription must be string but got: {type_name(value)}."
                spaceCount += value.count(" ")
            if spaceCount < len(sampleText) // 2:
                raise WrongDataFormat(
                    "The text file doesn't seem to be separated by spaces or sentences are extremely short."
                )
            textTemp = fhm.create("a+", suffix=".txt", encoding="utf-8")
            text.save(textTemp, discardUttID=True)
            text = textTemp.name

        unkSymbol = lexicons("oov")

        wordlistTemp = fhm.create("w+", encoding='utf-8', suffix=".txt")
        words = lexicons("words")
        words = "\n".join(words.keys())
        wordlistTemp.write(words)
        wordlistTemp.seek(0)

        extraConfig = " "
        specifyDiscount = False
        if config is not None:
            if check_config(name='train_ngrams_srilm', config=config):
                for key, value in config.items():
                    if isinstance(value, bool):
                        if value is True:
                            extraConfig += f"{key} "
                        if key.endswith("discount"):
                            specifyDiscount = True
                    else:
                        extraConfig += f" {key} {value}"

        cmd = f'ngram-count -text {text} -order {order} -limit-vocab -vocab {wordlistTemp.name} -unk -map-unk "{unkSymbol}" '
        if specifyDiscount is False:
            cmd += "-kndiscount "
        cmd += "-interpolate "

        if not outFile.rstrip().endswith(".arpa"):
            outFile += ".arpa"
        make_dependent_dirs(outFile, pathIsFile=True)
        cmd += f" -lm {outFile}"

        out, err, cod = run_shell_command(cmd, stderr="PIPE")

        if (isinstance(cod, int) and cod != 0) or (
                not os.path.isfile(outFile)) or os.path.getsize(outFile) == 0:
            print(err.decode())
            if os.path.isfile(outFile):
                os.remove(outFile)
            raise KaldiProcessError(
                f'Failed to generate N-Grams language model.')

        return outFile
Exemplo n.º 23
0
	def get_1best(self, wordSymbolTable=None, hmm=None, lmwt=1, acwt=1.0, phoneLevel=False):
		'''
		Get 1 best result with text formation.

		Args:
			<wordSymbolTable>: None or file path or ListTable object or LexiconBank object.
			<hmm>: None or file path or HMM object.
			<lmwt>: language model weight.
			<acwt>: acoustic model weight.
			<phoneLevel>: If Ture, return phone results.
		Return:
			An exkaldi Transcription object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any data in lattice.')

		assert isinstance(lmwt, int) and lmwt >=0, "Expected <lmwt> is a non-negative int number."

		if wordSymbolTable is None:
			assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable."
			wordSymbolTable = self.wordSymbolTable
		
		if hmm is None:
			assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable."
			hmm = self.hmm

		modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		wordSymbolTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")

		try:
			if isinstance(wordSymbolTable, str):
				assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}."
				wordsFile = wordSymbolTable
			elif type_name(wordSymbolTable) == "LexiconBank":
				if phoneLevel:
					wordSymbolTable.dump_dict("phones", wordSymbolTemp)
				else:
					wordSymbolTable.dump_dict("words", wordSymbolTemp)
				wordsFile = wordSymbolTemp.name
			elif type_name(wordSymbolTable) == "ListTable":
				wordSymbolTable.save(wordSymbolTemp)
				wordSymbolTemp.seek(0)
				wordsFile = wordSymbolTemp.name
			else:
				raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.")

			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]:
				hmm.save(modelTemp)
				hmmFile = modelTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.")

			if phoneLevel:
				cmd0 = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | '
			else:
				cmd0 = ""

			cmd1 = f"lattice-best-path --lm-scale={lmwt} --acoustic-scale={acwt} --word-symbol-table={wordsFile} --verbose=2 ark:- ark,t:- "
			cmd = cmd0 + cmd1

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)
			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError('Failed to get 1-best from lattice.')
			else:
				out = out.decode().strip().split("\n")
				if phoneLevel:
					newName = "1-best-phones"
				else:
					newName = "1-best-words"

				results = Transcription(name=newName)
				for re in out:
					re = re.strip().split(maxsplit=1)
					if len(re) == 0:
						continue
					elif len(re) == 1:
						results[re[0]] = " "
					else:
						results[re[0]] = re[1]
				return results

		finally:
			modelTemp.close()
			wordSymbolTemp.close()
Exemplo n.º 24
0
def wer(ref, hyp, ignore=None, mode='all'):
    '''
	Compute WER (word error rate) between <ref> and <hyp>. 

	Args:
		<ref>, <hyp>: exkaldi transcription object or file path.
		<ignore>: ignore a symbol.
		<mode>: "all" or "present".
	Return:
		a namedtuple of score information.
	'''
    assert mode in ['all',
                    'present'], 'Expected <mode> to be "present" or "all".'
    ExkaldiInfo.vertify_kaldi_existed()

    hypTemp = tempfile.NamedTemporaryFile("w+",
                                          suffix=".txt",
                                          encoding="utf-8")
    refTemp = tempfile.NamedTemporaryFile("w+",
                                          suffix=".txt",
                                          encoding="utf-8")
    try:
        if ignore is None:
            if type_name(hyp) == "Transcription":
                hyp.save(hypTemp)
                hypTemp.seek(0)
                hypFileName = hypTemp.name
            elif isinstance(hyp, str):
                if not os.path.isfile(hyp):
                    raise WrongPath(f"No such file:{hyp}.")
                else:
                    hypFileName = hyp
            else:
                raise UnsupportedType(
                    '<hyp> should be exkaldi Transcription object or file path.'
                )

            if type_name(ref) == "Transcription":
                ref.save(refTemp)
                refTemp.seek(0)
                refFileName = refTemp.name
            elif isinstance(ref, str):
                if not os.path.isfile(ref):
                    raise WrongPath(f"No such file:{ref}.")
                else:
                    refFileName = ref
            else:
                raise UnsupportedType(
                    '<ref> should be exkaldi Transcription object or file path.'
                )

            cmd = f'compute-wer --text --mode={mode} ark:{refFileName} ark,p:{hypFileName}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
        else:
            if type_name(hyp) == "Transcription":
                hyp = hyp.save()
            elif isinstance(hyp, str):
                if not os.path.isfile(hyp):
                    raise WrongPath(f"No such file:{hyp}.")
                else:
                    with open(hyp, "r", encoding="utf-8") as fr:
                        hyp = fr.read()
            else:
                raise UnsupportedType(
                    '<hyp> should be exkaldi Transcription object or file path.'
                )

            cmd = f'sed "s/{ignore} //g" > {hypTemp.name}'
            hypOut, err, _ = run_shell_command(cmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE,
                                               inputs=hyp.encode())
            if len(hypOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<hyp> has wrong data formation.")

            if type_name(ref) == "Transcription":
                ref = ref.save()
            elif isinstance(ref, str):
                if not os.path.isfile(ref):
                    raise WrongPath(f"No such file:{ref}.")
                else:
                    with open(ref, "r", encoding="utf-8") as fr:
                        ref = fr.read()
            else:
                raise UnsupportedType(
                    '<ref> should be exkaldi Transcription object or file path.'
                )

            cmd = f'sed "s/{ignore} //g" > {refTemp.name}'
            refOut, err, cod = run_shell_command(cmd,
                                                 stdin=subprocess.PIPE,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE,
                                                 inputs=hyp.encode())
            if cod != 0 or len(refOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<ref> has wrong data formation.")

            cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)

    finally:
        hypTemp.close()
        refTemp.close()

    if len(scoreOut) == 0:
        print(scoreErr.decode())
        raise KaldiProcessError("Failed to compute WER.")

    else:
        out = scoreOut.decode().split("\n")
        pattern1 = '%WER (.*) \[ (.*) \/ (.*), (.*) ins, (.*) del, (.*) sub \]'
        pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]"
        pattern3 = "Scored (.*) sentences, (.*) not present in hyp."
        s1 = re.findall(pattern1, out[0])[0]
        s2 = re.findall(pattern2, out[1])[0]
        s3 = re.findall(pattern3, out[2])[0]

        return namedtuple("Score", [
            "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences",
            "wrongSentences", "missedSentences"
        ])(
            float(s1[0]),  #WER
            int(s1[2]),  #words
            int(s1[3]),  #ins
            int(s1[4]),  #del
            int(s1[5]),  #sub
            float(s2[0]),  #SER
            int(s2[1]),  #sentences
            int(s2[2]),  #wrong sentences
            int(s3[1])  #missed sentences
        )