Пример #1
0
def use_fmllr(feat,fmllrMat,utt2spk,outFile=None):
	'''
	Transfrom to fmllr feature.

	Share Args:
		Null

	Parallel Args:
		<feat>: exkaldi feature or index table object.
		<fmllrMat>: exkaldi fMLLR transform matrix or index table object.
		<utt2spk>: file name or ListTable object.
		<outFile>: output file name.
	
	Return:
		exkaldi feature or index table object.
	'''
	feats,fmllrMats,utt2spks,outFiles = check_multiple_resources(feat,fmllrMat,utt2spk,outFile=outFile)

	names = []
	for index,feat,fmllrMat,utt2spk in zip(range(len(outFiles)),feats,fmllrMats,utt2spks):
		# verify data
		declare.is_feature("feat",feat)
		declare.is_fmllr_matrix("fmllrMat",fmllrMat)
		# verify utt2spk
		declare.is_potential_list_table("utt2spk",utt2spk)
		names.append(f"fmllr({feat.name},{fmllrMat.name})")
	
	cmdPattern = 'transform-feats --utt2spk=ark:{utt2spk} {transMat} {feat} ark:{outFile}'
	resources = {"feat":feats,"transMat":fmllrMats,"utt2spk":utt2spks,"outFile":outFiles}

	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Пример #2
0
def compute_cmvn_stats(feat,spk2utt=None,name="cmvn",outFile=None):
	'''
	Compute CMVN statistics.

	Share Args:
		Null

	Parrallel Args:
		<feat>: exkaldi feature object or index table object.
		<spk2utt>: spk2utt file or exkaldi ListTable object.
		<name>: name of output CMVN object.
		<outFile>: output file name.

	Return:
		exkaldi CMVN statistics or index table object.
	''' 
	feats,spk2utts,names,outFiles = check_multiple_resources(feat,spk2utt,name,outFile=outFile)

	for feat,spk2utt in zip(feats,spk2utts):
		# verify feature
		declare.is_feature("feat",feat)
		# verify spk2utt
		if spk2utt is not None:
			declare.is_potential_list_table("spk2utt",spk2utt)
	
	if spk2utts[0] is None:
		cmdPattern = 'compute-cmvn-stats {feat} ark:{outFile}'
		resources  = {"feat":feats,"outFile":outFiles}
	else:
		cmdPattern = 'compute-cmvn-stats --spk2utt=ark:{spk2utt} {feat} ark:{outFile}'
		resources  = {"feat":feats,"spk2utt":spk2utts,"outFile":outFiles}

	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="cmvn",archiveNames=names)
Пример #3
0
def utt_to_spk(utts,utt2spk):
	'''
	Accept a list of utterance IDs and return their corresponding speaker IDs.

	Args:
		<utts>: a string or list or tuple of utterance IDs.
		<utt2spk>: utt2spk file or ListTable object.
	
	Return:
		a list of speaker IDs.
	'''
	declare.is_classes("utterance IDs",utts,(str,tuple,list))
	if not isinstance(utts,str):
		declare.members_are_valid_strings("utterance IDs",utts)
	else:
		utts = [utts,]	

	declare.is_potential_list_table("utt2spk",utt2spk)
	if isinstance(utt2spk,str):
		utt2spk = load_list_table(utt2spk)
	
	spks = []
	for utt in utts:
		try:
			spk = utt2spk[utt]
		except KeyError:
			raise WrongOperation(f"Miss utterance ID {utt} in utt2spk map.")
		else:
			declare.is_valid_string("The value of utt2spk",utt)
			spktemp = spk.strip().split(maxsplit=1)
			assert len(spktemp) == 1,f"speaker ID in utt2spk has unexpected space: {spk}."
			spks.append(spktemp[0])
	
	return sorted(list(set(spks)))
Пример #4
0
def spk_to_utt(spks,spk2utt):
	'''
	Accept a list of speaker IDs and return their corresponding utterance IDs.

	Args:
		<spks>: a string or list or tuple of speaker IDs.
		<spk2utt>: spk2utt file or ListTable object.
	
	Return:
		a list of utterance IDs.
	'''
	declare.is_classes("speaker IDs",spks,(str,tuple,list))

	if not isinstance(spks,str):
		declare.members_are_valid_strings("speaker IDs",spks)
	else:
		spks = [spks,]
		
	declare.is_potential_list_table("spk2utt",spk2utt)
	if isinstance(spk2utt,str):
		spk2utt = load_list_table(spk2utt)
	
	utts = []
	for spk in spks:
		try:
			utt = spk2utt[spk]
		except KeyError:
			raise WrongOperation(f"Miss speaker ID {spk} in spk2utt map.")
		else:
			declare.is_valid_string("The value of spk2utt",utt)
			utts.extend(utt.strip().split())
	
	return sorted(list(set(utts)))
Пример #5
0
def utt2spk_to_spk2utt(utt2spk,outFile=None):
	'''
	Transform utt2spk to spk2utt.

	Args:
		<utt2spk>: file name or exkaldi ListTable object.
		<outFile>: file name or None.
	
	Return:
		file name or exakldi ListTable object.
	'''
	declare.is_potential_list_table("utt2spk",utt2spk)
	if outFile is not None:
		declare.is_valid_file_name(outFile)
	
	if isinstance(utt2spk,str):
		utt2spk = load_list_table(utt2spk)

	spk2utt = ListTable(name="spk2utt")
	for utt,spk in utt2spk.items():
		declare.is_valid_string("utterance ID",utt)
		declare.is_valid_string("speaker ID",spk)
		assert utt.count(" ") == 0,f"<utterance ID> is not a continuous string but spaces existed: {utt}."
		assert spk.count(" ") == 0,f"<speaker ID> is not a continuous string but spaces existed: {spk}."
		
		try:
			spk2utt[spk] += f" {utt}"
		except KeyError:
			spk2utt[spk] = utt

	if outFile is None:
		return spk2utt
	else:
		spk2utt.save(outFile)
		return outFile
Пример #6
0
def use_cmvn(feat,cmvn,utt2spk=None,std=False,outFile=None):
	'''
	Apply CMVN statistics to feature.

	Share Args:
		Null

	Parrallel Args:
		<feat>: exkaldi feature or index table object.
		<cmvn>: exkaldi CMVN statistics or index object.
		<utt2spk>: file path or ListTable object.
		<std>: If true,apply std normalization.
		<outFile>: out file name.

	Return:
		feature or index table object.
	'''
	feats,cmvns,utt2spks,stds,outFiles = check_multiple_resources(feat,cmvn,utt2spk,std,outFile=outFile)

	names = []
	for i,feat,cmvn,utt2spk,std in zip(range(len(outFiles)),feats,cmvns,utt2spks,stds):
		# verify feature and cmvn
		declare.is_feature("feat",feat)
		declare.is_cmvn("cmvn",cmvn)
		# verify utt2spk
		if utt2spk is not None:
			declare.is_potential_list_table("utt2spk",utt2spk)
		# std
		declare.is_bool("std",std)
		#stds[i] = "true" if std else "false"
		names.append( f"cmvn({feat.name},{cmvn.name})" ) 

	if utt2spks[0] is None:
		cmdPattern = 'apply-cmvn --norm-vars={std} {cmvn} {feat} ark:{outFile}'
		resources = {"feat":feats,"cmvn":cmvns,"std":stds,"outFile":outFiles}
	else:
		cmdPattern = 'apply-cmvn --norm-vars={std} --utt2spk=ark:{utt2spk} {cmvn} {feat} ark:{outFile}'
		resources = {"feat":feats,"cmvn":cmvns,"utt2spk":utt2spks,"std":stds,"outFile":outFiles}	
	
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Пример #7
0
def spk2utt_to_utt2spk(spk2utt,outFile=None):
	'''
	Transform spk2utt file to utt2spk file.

	Args:
		<spk2utt>: file name or exkaldi ListTable object.
		<outFile>: file name or None.

	Return:
		file name or exakldi ListTable object.
	'''
	declare.is_potential_list_table("spk2utt",spk2utt)
	if outFile is not None:
		declare.is_valid_file_name(outFile)
	
	if isinstance(spk2utt,str):
		spk2utt = load_list_table(spk2utt)

	utt2spk = ListTable(name="utt2spk")
	for spk,utts in spk2utt.items():
		declare.is_valid_string("utterance IDs",utts)
		declare.is_valid_string("speaker ID",spk)
		assert spk.count(" ") == 0,f"<speaker ID> is not a continuous string but spaces existed: {spk}."

		for utt in utts.split():
			try:
				utt2spk[utt]
			except KeyError:
				utt2spk[utt] = spk
			else:
				raise WrongDataFormat(f"utterance ID:{utt} has existed toward multiple speakers.")

	if outFile is None:
		return utt2spk
	else:
		utt2spk.save(outFile)
		return outFile