Exemplos de mfcc em Python, exemplos de Talkbox.mfcc em Python

Exemplo n.º 1

0

Exibir arquivo

def get_ceps_true_speaker(speaker_samples_ceps='combined'):
    nfft = 512
    t_frame = 20 * 10**(-3)  # Duration in seconds of each frame

    if speaker_samples_ceps == 'combined':
        [fs, TS_audiofiles] = get_np_audiofiles(UBM=False,
                                                TS=True,
                                                normalize=True)
        nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
        ceps, mspec, spec = mfcc(TS_audiofiles, nwin, nfft, fs, nceps)
        return ceps
    else:
        files_in_folder = os.listdir(
            os.getcwd())  # List of files in current directory
        true_speaker_files = [
            x for x in files_in_folder
            if ('.wav' in x) and ('true_speaker' in x)
        ]

        [fs, audio_01] = read_audiofile(true_speaker_files[0], normalize=True)
        nwin = t_frame * fs
        ceps_01, mspec, spec = mfcc(audio_01, nwin, nfft, fs, nceps)

        [fs, audio_02] = read_audiofile(true_speaker_files[1], normalize=True)
        nwin = t_frame * fs
        ceps_02, mspec, spec = mfcc(audio_02, nwin, nfft, fs, nceps)

        [fs, audio_03] = read_audiofile(true_speaker_files[2], normalize=True)
        nwin = t_frame * fs
        ceps_03, mspec, spec = mfcc(audio_03, nwin, nfft, fs, nceps)

        return ceps_01, ceps_02, ceps_03

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def get_ceps_UBM(str_gender='all', exclude_speaker=None):
	
	# -------------------
	# Getting audio files in Numpy array format
	# -------------------
	
	[fs, np_audio_male, np_audio_female, np_audio_all] = get_np_audiofiles(UBM=True, exclude_speaker=None)

	
	# ----------------------------------------------------
	# Computing MFCC
	# ----------------------------------------------------
	
	t_frame = 20*10**(-3) # Duration in seconds of each frame

	nwin = t_frame*fs
	# nwin is the number of samples per frame.
	# Para t_frame=20ms e fs=16kHz, nwin=320
	nfft = 512
	
	if str_gender == 'male':
		ceps, mspec, spec = mfcc(np_audio_male, nwin, nfft, fs, nceps)
	if str_gender == 'female':
		ceps, mspec, spec = mfcc(np_audio_female, nwin, nfft, fs, nceps)
	else:
		ceps, mspec, spec = mfcc(np_audio_all, nwin, nfft, fs, nceps)
	return ceps

Exemplo n.º 3

0

Exibir arquivo

def get_ceps_UBM(str_gender='all', exclude_speaker=None):

    # -------------------
    # Getting audio files in Numpy array format
    # -------------------

    [fs, np_audio_male, np_audio_female,
     np_audio_all] = get_np_audiofiles(UBM=True, exclude_speaker=None)

    # ----------------------------------------------------
    # Computing MFCC
    # ----------------------------------------------------

    t_frame = 20 * 10**(-3)  # Duration in seconds of each frame

    nwin = t_frame * fs
    # nwin is the number of samples per frame.
    # Para t_frame=20ms e fs=16kHz, nwin=320
    nfft = 512

    if str_gender == 'male':
        ceps, mspec, spec = mfcc(np_audio_male, nwin, nfft, fs, nceps)
    if str_gender == 'female':
        ceps, mspec, spec = mfcc(np_audio_female, nwin, nfft, fs, nceps)
    else:
        ceps, mspec, spec = mfcc(np_audio_all, nwin, nfft, fs, nceps)
    return ceps

Exemplo n.º 4

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def get_ceps_true_speaker(speaker_samples_ceps='combined'):
	nfft = 512
	t_frame = 20*10**(-3) # Duration in seconds of each frame

	if speaker_samples_ceps=='combined':
		[fs, TS_audiofiles] = get_np_audiofiles(UBM=False, TS=True, normalize=True)
		nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
		ceps, mspec, spec = mfcc(TS_audiofiles, nwin, nfft, fs, nceps)
		return ceps
	else:
		files_in_folder = os.listdir(os.getcwd()) # List of files in current directory
		true_speaker_files = [x for x in files_in_folder if ('.wav' in x) and ('true_speaker' in x)]
		
		[fs,audio_01] = read_audiofile(true_speaker_files[0],normalize=True)
		nwin = t_frame*fs
		ceps_01, mspec, spec = mfcc(audio_01, nwin, nfft, fs, nceps)

		[fs,audio_02] = read_audiofile(true_speaker_files[1],normalize=True)
		nwin = t_frame*fs
		ceps_02, mspec, spec = mfcc(audio_02, nwin, nfft, fs, nceps)

		[fs,audio_03] = read_audiofile(true_speaker_files[2],normalize=True)
		nwin = t_frame*fs
		ceps_03, mspec, spec = mfcc(audio_03, nwin, nfft, fs, nceps)
		
		return ceps_01, ceps_02, ceps_03

Exemplo n.º 5

0

Exibir arquivo

def audio2ceps(filename, flag_normalize=True):
    '''
	Reads an audio file and creates a text file with its MFCC.
	'''

    file = filename.split('.')[0]  # Separates file name and extension

    # --------------------------------------------
    # File reading into numpy array
    [fs, np_audio] = read_audiofile(filename, normalize=flag_normalize)

    # --------------------------------------------
    # MFCC calculation
    t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
    nwin = t_frame * fs
    # nwin is the number of samples per frame.
    # Para t_frame=20ms e fs=16kHz, nwin=320
    nfft = 512

    ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps)
    [nframes, ncolumns] = ceps.shape

    # --------------------------------------------
    # Text file creation
    np.savetxt(file + '_ceps.txt',
               ceps,
               header='MFCC from file ' + filename +
               '.\nInfo:\n\tSample rate: ' + str(RATE) +
               '\n\tNumber of MFCC per frame: ' + str(nceps) +
               '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n')

Exemplo n.º 6

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def audio2ceps(filename,flag_normalize=True):
	'''
	Reads an audio file and creates a text file with its MFCC.
	'''
	
	file = filename.split('.')[0] # Separates file name and extension
	
	# --------------------------------------------
	# File reading into numpy array	
	[fs, np_audio] = read_audiofile(filename,normalize = flag_normalize)
	
	# --------------------------------------------
	# MFCC calculation
	t_frame = 20*10**(-3) # Duration in seconds of each frame
	nwin = t_frame*fs
	# nwin is the number of samples per frame.
	# Para t_frame=20ms e fs=16kHz, nwin=320
	nfft = 512
	
	ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps)
	[nframes, ncolumns] = ceps.shape
	
	# --------------------------------------------
	# Text file creation
	np.savetxt(file + '_ceps.txt',ceps,header='MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str(RATE) + '\n\tNumber of MFCC per frame: ' + str(nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n')

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ASV.py Projeto: gboaviagem/TCC

def audio2ceps(filename,flag_normalize=True):
	'''
	Reads an audio file and creates a text file with its MFCC.
	
	Problem to fix: in the Raspberry, the function np.savetxt somehow doesn't accept the keyword 'header'.
	'''
	
	file = filename.split('.')[0] # Separates file name and extension
	
	# --------------------------------------------
	# File reading into numpy array	
	[fs, np_audio] = read_audiofile(filename,normalize = flag_normalize)
	
	# --------------------------------------------
	# MFCC calculation
	t_frame = 20*10**(-3) # Duration in seconds of each frame
	nwin = t_frame*fs
	# nwin is the number of samples per frame.
	# Para t_frame=20ms e fs=16kHz, nwin=320
	nfft = 512
	
	ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps)
	[nframes, ncolumns] = ceps.shape
	
	# --------------------------------------------
	# Text file creation
	str_header = 'MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str(RATE) + '\n\tNumber of MFCC per frame: ' + str(nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n'
	if sys.platform=='linux2': # in Raspbian
		np.savetxt(file + '_ceps.txt',ceps)
	else:
		np.savetxt(file + '_ceps.txt',ceps,header=str_header)

Exemplo n.º 8

0

Exibir arquivo

def audio2ceps(filename, flag_normalize=True):
    '''
	Reads an audio file and creates a text file with its MFCC.
	
	Problem to fix: in the Raspberry, the function np.savetxt somehow doesn't accept the keyword 'header'.
	'''

    file = filename.split('.')[0]  # Separates file name and extension

    # --------------------------------------------
    # File reading into numpy array
    [fs, np_audio] = read_audiofile(filename, normalize=flag_normalize)

    # --------------------------------------------
    # MFCC calculation
    t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
    nwin = t_frame * fs
    # nwin is the number of samples per frame.
    # Para t_frame=20ms e fs=16kHz, nwin=320
    nfft = 512

    ceps, mspec, spec = mfcc(np_audio, nwin, nfft, fs, nceps)
    [nframes, ncolumns] = ceps.shape

    # --------------------------------------------
    # Text file creation
    str_header = 'MFCC from file ' + filename + '.\nInfo:\n\tSample rate: ' + str(
        RATE) + '\n\tNumber of MFCC per frame: ' + str(
            nceps) + '\n\tNumber of frames (samples): ' + str(nframes) + '\n\n'
    if sys.platform == 'linux2':  # in Raspbian
        np.savetxt(file + '_ceps.txt', ceps)
    else:
        np.savetxt(file + '_ceps.txt', ceps, header=str_header)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def get_ceps_threshold_files():
	files_in_folder = os.listdir(os.getcwd()) # List of files in current directory
	threshold_files = [x for x in files_in_folder if ('threshold_audio.wav' in x)]
	t_frame = 20*10**(-3) # Duration in seconds of each frame
	nfft = 512

	[fs,np_threshold] = read_audiofile(threshold_files[0],normalize=True)
	nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
	ceps_01, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps)

	[fs,np_threshold] = read_audiofile(threshold_files[1],normalize=True)
	nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
	ceps_02, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps)

	[fs,np_threshold] = read_audiofile(threshold_files[2],normalize=True)
	nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
	ceps_03, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps)
	
	return ceps_01, ceps_02, ceps_03

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def get_ceps_test_speaker():
	files_in_folder = os.listdir(os.getcwd()) # List of files in current directory
	test_file = [x for x in files_in_folder if ('test_speaker.wav' in x)]
	[fs,np_test] = read_audiofile(test_file[0],normalize=True)
	
	t_frame = 20*10**(-3) # Duration in seconds of each frame
	nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
	nfft = 512
	
	ceps, mspec, spec = mfcc(np_test, nwin, nfft, fs, nceps)
	
	return ceps

Exemplo n.º 11

0

Exibir arquivo

def get_ceps_threshold_files():
    files_in_folder = os.listdir(
        os.getcwd())  # List of files in current directory
    threshold_files = [
        x for x in files_in_folder if ('threshold_audio.wav' in x)
    ]
    t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
    nfft = 512

    [fs, np_threshold] = read_audiofile(threshold_files[0], normalize=True)
    nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
    ceps_01, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps)

    [fs, np_threshold] = read_audiofile(threshold_files[1], normalize=True)
    nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
    ceps_02, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps)

    [fs, np_threshold] = read_audiofile(threshold_files[2], normalize=True)
    nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
    ceps_03, mspec, spec = mfcc(np_threshold, nwin, nfft, fs, nceps)

    return ceps_01, ceps_02, ceps_03

Exemplo n.º 12

0

Exibir arquivo

def get_ceps_test_speaker():
    files_in_folder = os.listdir(
        os.getcwd())  # List of files in current directory
    test_file = [x for x in files_in_folder if ('test_speaker.wav' in x)]
    [fs, np_test] = read_audiofile(test_file[0], normalize=True)

    t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
    nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
    nfft = 512

    ceps, mspec, spec = mfcc(np_test, nwin, nfft, fs, nceps)

    return ceps

Exemplo n.º 13

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def evaluate_all_vs_single_true(flag_speaker_samples='combined'):
	
	# ----------------------------------------
	# Model of True Speaker
	if flag_speaker_samples=='combined':
		model_true = get_GMM_true_speaker(speaker_samples=flag_speaker_samples)
	else: # Neste caso flag_speaker_samples=='separated'
		[model_true_01, model_true_02, model_true_03] = get_GMM_true_speaker(speaker_samples=flag_speaker_samples)
	
	# ----------------------------------------
	# Setting up text file
	
	files_in_folder = os.listdir(os.getcwd()) # List of files in current directory
	text_output_files = [x for x in files_in_folder if ('Verification_Test_04_true' in x) and (speaker_samples in x)]

	number = len(text_output_files) + 1
	f = open('Verification_Test_04_true_' + speaker_samples + '_0' + str(number) + '.txt','w')
	f.write('# ' + text_dependency + ' ASV\n# Header: a single ASV test is carried out for each test speaker, with the true speaker fixed, but having three elocutions.\n# GENDER\tINDEX\tAGE\tSCORE\tDECISION\n')

	n_accepted = 0
	n_false_acceptance = 0
	n_rejected = 0
	n_false_rejection = 0


	'''
	----------------------------------------------------
	Training UBM for all UBM-speakers except the True one
	----------------------------------------------------
	'''
	cov_type = 'full'
	UBM_all = get_UBM_all(cov_type='full')

	'''
	----------------------------------------------------
	Getting threshold
	----------------------------------------------------
	'''
	threshold = get_score_threshold(speaker_samples_threshold = speaker_samples)


	files_in_folder = os.listdir(os.getcwd()) # List of files in current directory
	audiofiles_male = [x for x in files_in_folder if ('.wav' in x) and ('M' in x) and ('UBM' not in x) and ('_test' not in x) and ('_true' not in x)]
	audiofiles_female = [x for x in files_in_folder if ('.wav' in x) and ('F' in x) and ('UBM' not in x) and ('_test' not in x) and ('_true' not in x)]
	audiofiles_S = [x for x in files_in_folder if ('.wav' in x) and ('S' in x) and ('UBM' not in x) and ('_test' not in x) and ('_true' not in x)]

	# ------------------------------------------------------------------------
	# Running through the lists of utterances from non-True speakers
	for gender_file_list in [audiofiles_male, audiofiles_female]:
		for current_test_speaker in gender_file_list:
			print '\nTEST 04'
			print 'For file ' + current_test_speaker + ':'

			[gender,index,aux] = current_test_speaker.split('_') # "aux" is just a disposable variable
			[age,aux] = aux.split('.')

			'''
			----------------------------------------------------
			Training test speaker
			----------------------------------------------------
			'''
			[fs, audio_test] = read_audiofile(current_test_speaker,normalize=True)
	
			t_frame = 20*10**(-3) # Duration in seconds of each frame
			nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
			nfft = 512

			ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)

			'''
			----------------------------------------------------
			Scoring
			----------------------------------------------------
			'''
			if speaker_samples=='combined':
				
				score_UBM = UBM_all.score(ceps_test)
				score_true = model_true.score(ceps_test)

				score = np.sum(score_true - score_UBM)
				print '\nScore: ', score
			else: # Neste caso speaker_samples=='separated'
				
				score_UBM = UBM_all.score(ceps_test)
				score01 = model_true_01.score(ceps_test)
				score02 = model_true_02.score(ceps_test)
				score03 = model_true_03.score(ceps_test)

				score_true = np.max(np.array([score01, score02, score03]))

				score = np.sum(score_true - score_UBM)
				print '\nScore: ', score
	
			print "Difference between sum(log) probabilites (True VS. UBM_all): ", score
	
			# ----------------------------------
			# DECISION
			# ----------------------------------
			if score >= threshold:
				decision = 'ACCEPT'
				n_accepted += 1
				n_false_acceptance += 1
			else:
				decision = 'Reject'
				n_rejected += 1
		
			f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' + decision + '\n')
	
	# ------------------------------------------------------------------------
	# Running through the utterances of true speaker (prefix = S)
	for current_test_speaker in audiofiles_S:
		print '\nTEST 04'
		print 'For file ' + current_test_speaker + ':'

		[gender,index,aux] = current_test_speaker.split('_') # "aux" is just a disposable variable
		[age,aux] = aux.split('.')

		'''
		----------------------------------------------------
		Training test speaker
		----------------------------------------------------
		'''
		[fs, audio_test] = read_audiofile(current_test_speaker,normalize=True)

		t_frame = 20*10**(-3) # Duration in seconds of each frame
		nwin = t_frame*fs # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
		nfft = 512

		ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)

		'''
		----------------------------------------------------
		Scoring
		----------------------------------------------------
		'''
		if speaker_samples=='combined':
			
			score_UBM = UBM_all.score(ceps_test)
			score_true = model_true.score(ceps_test)

			score = np.sum(score_true - score_UBM)
			print '\nScore: ', score
		else: # Neste caso speaker_samples=='separated'
			
			score_UBM = UBM_all.score(ceps_test)
			score01 = model_true_01.score(ceps_test)
			score02 = model_true_02.score(ceps_test)
			score03 = model_true_03.score(ceps_test)

			score_true = np.max(np.array([score01, score02, score03]))

			score = np.sum(score_true - score_UBM)
			print '\nScore: ', score

		print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

		# ----------------------------------
		# DECISION
		# ----------------------------------
		if score >= threshold:
			decision = 'ACCEPT'
			n_accepted += 1
		else:
			decision = 'Reject'
			n_rejected += 1
			n_false_rejection += 1
	
		f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' + decision + '\n')
	
	
	f.write('# Number of accepted: ' + str(n_accepted) + '\n# Number of rejected: ' + str(n_rejected) + '\n# Number of false-acceptance: ' + str(n_false_acceptance) + '\n# Number of false-rejection: ' + str(n_false_rejection) + '\n# Threshold: ' + str(threshold))
	f.close()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: ASV_old_01.py Projeto: gboaviagem/TCC

def evaluate_all_vs_true():
    """
	# ======================================================================
	# TESTING WITH ALL SPEAKERS AGAINST A SINGLE TRUE SPEAKER
	"""
    files_in_folder = os.listdir(os.getcwd())  # List of files in current directory
    audiofiles_male = [
        x
        for x in files_in_folder
        if (".wav" in x) and ("M" in x) and ("UBM" not in x) and ("_true" not in x) and ("_test" not in x)
    ]
    audiofiles_female = [
        x
        for x in files_in_folder
        if (".wav" in x) and ("F" in x) and ("UBM" not in x) and ("_true" not in x) and ("_test" not in x)
    ]

    print "len(audiofiles_male): ", len(audiofiles_male)
    print "len(audiofiles_female): ", len(audiofiles_female)

    true_speakers_list = (
        audiofiles_male + audiofiles_female + ["01_true_speaker.wav", "02_true_speaker.wav", "03_true_speaker.wav"]
    )  # Concatenation of both lists

    for true_speaker in true_speakers_list:

        """
		----------------------------------------------------
		Training UBM for all training speakers (male & female) except the True one
		----------------------------------------------------
		"""
        cov_type = "full"
        UBM_all = get_UBM_all(cov_type="full", exclude_speaker=true_speaker)

        """
		----------------------------------------------------
		Setting up .txt files and lists
		----------------------------------------------------
		"""
        f = open("Verification_Test_02_" + true_speaker.split(".")[0] + ".txt", "w")
        f.write(
            "# Header: a single ASV test is carried out for each speaker S, with the speaker "
            + true_speaker.split(".")[0]
            + " being the True Speaker in all tests.\n# GENDER\tINDEX\tAGE\tSCORE\n"
        )

        for gender_file_list in [audiofiles_male, audiofiles_female]:
            for current_test_speaker in gender_file_list:
                print "\nTEST 02 (all against one)"
                print "For file " + current_test_speaker + ":"

                [gender, index, aux] = current_test_speaker.split("_")  # "aux" is just a disposable variable
                [age, aux] = aux.split(".")

                """
				----------------------------------------------------
				Training true speaker
				----------------------------------------------------
				"""
                fs, audio_true = spwave.read(true_speaker)

                t_frame = 20 * 10 ** (-3)  # Duration in seconds of each frame
                nwin = t_frame * fs
                # nwin is the number of samples per frame.
                # Para t_frame=20ms e fs=16kHz, nwin=320
                nfft = 512

                ceps_true, mspec, spec = mfcc(audio_true, nwin, nfft, fs, nceps)

                ngaussians = 10
                cov_type = "full"

                gmm = GMM(n_components=ngaussians, covariance_type=cov_type)
                model_true = gmm.fit(ceps_true)
                print "model_true converged? ", model_true.converged_

                """
				----------------------------------------------------
				Training test speaker
				----------------------------------------------------
				"""
                fs, audio_test = spwave.read(current_test_speaker)

                nwin = t_frame * fs
                # nwin is the number of samples per frame.
                # Para t_frame=20ms e fs=16kHz, nwin=320

                ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)

                """
				----------------------------------------------------
				Scoring
				----------------------------------------------------
				"""
                log_prob_true = model_true.score(ceps_test)
                log_prob_UBM = UBM_all.score(ceps_test)

                print np.sum(log_prob_true)
                print np.sum(log_prob_UBM)
                score = np.sum(log_prob_true) - np.sum(log_prob_UBM)
                print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

                f.write(gender + "\t" + index + "\t" + age + "\t" + str(score) + "\n")
        f.close()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: ASV_old_01.py Projeto: gboaviagem/TCC

def evaluate_all_vs_single_true(speaker_samples="combined"):
    current_folder = os.getcwd()
    text_dependency = current_folder.split("/")[
        -1
    ]  # Takes the name of the enclosing folder, if it is TD (text dependent) or TI (text independent)

    files_in_folder = os.listdir(os.getcwd())  # List of files in current directory
    text_output_files = [x for x in files_in_folder if ("Verification_Test_04_true" in x) and (speaker_samples in x)]

    number = len(text_output_files) + 1
    f = open("Verification_Test_04_true_" + speaker_samples + "_0" + str(number) + ".txt", "w")
    f.write(
        "# "
        + text_dependency
        + " ASV\n# Header: a single ASV test is carried out for each test speaker, with the true speaker fixed, but having three elocutions.\n# GENDER\tINDEX\tAGE\tSCORE\tDECISION\n"
    )

    n_accepted = 0
    n_false_acceptance = 0
    n_rejected = 0
    n_false_rejection = 0

    """
	----------------------------------------------------
	Training UBM for all UBM-speakers except the True one
	----------------------------------------------------
	"""
    cov_type = "full"
    UBM_all = get_UBM_all(cov_type="full")

    """
	----------------------------------------------------
	Getting threshold
	----------------------------------------------------
	"""
    threshold = get_score_threshold(speaker_samples_threshold=speaker_samples)

    files_in_folder = os.listdir(os.getcwd())  # List of files in current directory
    audiofiles_male = [
        x
        for x in files_in_folder
        if (".wav" in x) and ("M" in x) and ("UBM" not in x) and ("_test" not in x) and ("_true" not in x)
    ]
    audiofiles_female = [
        x
        for x in files_in_folder
        if (".wav" in x) and ("F" in x) and ("UBM" not in x) and ("_test" not in x) and ("_true" not in x)
    ]
    audiofiles_S = [
        x
        for x in files_in_folder
        if (".wav" in x) and ("S" in x) and ("UBM" not in x) and ("_test" not in x) and ("_true" not in x)
    ]

    # Running through the lists of utterances from non-True speakers
    for gender_file_list in [audiofiles_male, audiofiles_female]:
        for current_test_speaker in gender_file_list:
            print "\nTEST 04"
            print "For file " + current_test_speaker + ":"

            [gender, index, aux] = current_test_speaker.split("_")  # "aux" is just a disposable variable
            [age, aux] = aux.split(".")

            """
			----------------------------------------------------
			Training test speaker
			----------------------------------------------------
			"""
            [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True)

            t_frame = 20 * 10 ** (-3)  # Duration in seconds of each frame
            nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
            nfft = 512

            ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)

            """
			----------------------------------------------------
			Scoring
			----------------------------------------------------
			"""
            if speaker_samples == "combined":
                model_true = get_GMM_true_speaker(speaker_samples="combined")

                score_UBM = UBM_all.score(ceps_test)
                score_true = model_true.score(ceps_test)

                score = np.sum(score_true - score_UBM)
                print "\nScore: ", score
            else:  # Neste caso speaker_samples=='separated'
                [model_true_01, model_true_02, model_true_03] = get_GMM_true_speaker(speaker_samples="separated")

                score_UBM = UBM_all.score(ceps_test)
                score01 = model_true_01.score(ceps_test)
                score02 = model_true_02.score(ceps_test)
                score03 = model_true_03.score(ceps_test)

                score_true = np.max(np.array([score01, score02, score03]))

                score = np.sum(score_true - score_UBM)
                print "\nScore: ", score

            print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

            # ----------------------------------
            # DECISION
            # ----------------------------------
            if score >= threshold:
                decision = "ACCEPT"
                n_accepted += 1
                n_false_acceptance += 1
            else:
                decision = "Reject"
                n_rejected += 1

            f.write(gender + "\t" + index + "\t" + age + "\t" + str(score) + "\t" + decision + "\n")

            # Running through the utterances of true speaker (prefix = S)
    for current_test_speaker in audiofiles_S:
        print "\nTEST 04"
        print "For file " + current_test_speaker + ":"

        [gender, index, aux] = current_test_speaker.split("_")  # "aux" is just a disposable variable
        [age, aux] = aux.split(".")

        """
		----------------------------------------------------
		Training test speaker
		----------------------------------------------------
		"""
        [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True)

        t_frame = 20 * 10 ** (-3)  # Duration in seconds of each frame
        nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
        nfft = 512

        ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)

        """
		----------------------------------------------------
		Scoring
		----------------------------------------------------
		"""
        if speaker_samples == "combined":
            model_true = get_GMM_true_speaker(speaker_samples="combined")

            score_UBM = UBM_all.score(ceps_test)
            score_true = model_true.score(ceps_test)

            score = np.sum(score_true - score_UBM)
            print "\nScore: ", score
        else:  # Neste caso speaker_samples=='separated'
            [model_true_01, model_true_02, model_true_03] = get_GMM_true_speaker(speaker_samples="separated")

            score_UBM = UBM_all.score(ceps_test)
            score01 = model_true_01.score(ceps_test)
            score02 = model_true_02.score(ceps_test)
            score03 = model_true_03.score(ceps_test)

            score_true = np.max(np.array([score01, score02, score03]))

            score = np.sum(score_true - score_UBM)
            print "\nScore: ", score

        print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

        # ----------------------------------
        # DECISION
        # ----------------------------------
        if score >= threshold:
            decision = "ACCEPT"
            n_accepted += 1
        else:
            decision = "Reject"
            n_rejected += 1
            n_false_rejection += 1

        f.write(gender + "\t" + index + "\t" + age + "\t" + str(score) + "\t" + decision + "\n")

    f.write(
        "# Number of accepted: "
        + str(n_accepted)
        + "\n# Number of rejected: "
        + str(n_rejected)
        + "\n# Number of false-acceptance: "
        + str(n_false_acceptance)
        + "\n# Number of false-rejection: "
        + str(n_false_rejection)
        + "\n# Threshold: "
        + str(threshold)
    )
    f.close()

Exemplo n.º 16

0

Exibir arquivo

def evaluate_all_vs_true():
    '''
	# ======================================================================
	# TESTING WITH ALL SPEAKERS AGAINST A SINGLE TRUE SPEAKER
	'''
    files_in_folder = os.listdir(
        os.getcwd())  # List of files in current directory
    audiofiles_male = [
        x for x in files_in_folder if ('.wav' in x) and ('M' in x) and (
            'UBM' not in x) and ('_true' not in x) and ('_test' not in x)
    ]
    audiofiles_female = [
        x for x in files_in_folder if ('.wav' in x) and ('F' in x) and (
            'UBM' not in x) and ('_true' not in x) and ('_test' not in x)
    ]

    print 'len(audiofiles_male): ', len(audiofiles_male)
    print 'len(audiofiles_female): ', len(audiofiles_female)

    true_speakers_list = audiofiles_male + audiofiles_female + [
        '01_true_speaker.wav', '02_true_speaker.wav', '03_true_speaker.wav'
    ]  # Concatenation of both lists

    for true_speaker in true_speakers_list:
        '''
		----------------------------------------------------
		Training UBM for all training speakers (male & female) except the True one
		----------------------------------------------------
		'''
        cov_type = 'full'
        UBM_all = get_UBM_all(cov_type='full', exclude_speaker=true_speaker)
        '''
		----------------------------------------------------
		Setting up .txt files and lists
		----------------------------------------------------
		'''
        f = open('Verification_Test_02_' + true_speaker.split('.')[0] + '.txt',
                 'w')
        f.write(
            '# Header: a single ASV test is carried out for each speaker S, with the speaker '
            + true_speaker.split('.')[0] +
            ' being the True Speaker in all tests.\n# GENDER\tINDEX\tAGE\tSCORE\n'
        )

        for gender_file_list in [audiofiles_male, audiofiles_female]:
            for current_test_speaker in gender_file_list:
                print '\nTEST 02 (all against one)'
                print 'For file ' + current_test_speaker + ':'

                [gender, index, aux] = current_test_speaker.split(
                    '_')  # "aux" is just a disposable variable
                [age, aux] = aux.split('.')
                '''
				----------------------------------------------------
				Training true speaker
				----------------------------------------------------
				'''
                fs, audio_true = spwave.read(true_speaker)

                t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
                nwin = t_frame * fs
                # nwin is the number of samples per frame.
                # Para t_frame=20ms e fs=16kHz, nwin=320
                nfft = 512

                ceps_true, mspec, spec = mfcc(audio_true, nwin, nfft, fs,
                                              nceps)

                ngaussians = 10
                cov_type = 'full'

                gmm = GMM(n_components=ngaussians, covariance_type=cov_type)
                model_true = gmm.fit(ceps_true)
                print "model_true converged? ", model_true.converged_
                '''
				----------------------------------------------------
				Training test speaker
				----------------------------------------------------
				'''
                fs, audio_test = spwave.read(current_test_speaker)

                nwin = t_frame * fs
                # nwin is the number of samples per frame.
                # Para t_frame=20ms e fs=16kHz, nwin=320

                ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs,
                                              nceps)
                '''
				----------------------------------------------------
				Scoring
				----------------------------------------------------
				'''
                log_prob_true = model_true.score(ceps_test)
                log_prob_UBM = UBM_all.score(ceps_test)

                print np.sum(log_prob_true)
                print np.sum(log_prob_UBM)
                score = np.sum(log_prob_true) - np.sum(log_prob_UBM)
                print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

                f.write(gender + '\t' + index + '\t' + age + '\t' +
                        str(score) + '\n')
        f.close()

Exemplo n.º 17

0

Exibir arquivo

def evaluate_all_vs_single_true(flag_speaker_samples='combined'):

    # ----------------------------------------
    # Model of True Speaker
    if flag_speaker_samples == 'combined':
        model_true = get_GMM_true_speaker(speaker_samples=flag_speaker_samples)
    else:  # Neste caso flag_speaker_samples=='separated'
        [model_true_01, model_true_02, model_true_03
         ] = get_GMM_true_speaker(speaker_samples=flag_speaker_samples)

    # ----------------------------------------
    # Setting up text file

    files_in_folder = os.listdir(
        os.getcwd())  # List of files in current directory
    text_output_files = [
        x for x in files_in_folder
        if ('Verification_Test_04_true' in x) and (speaker_samples in x)
    ]

    number = len(text_output_files) + 1
    f = open(
        'Verification_Test_04_true_' + speaker_samples + '_0' + str(number) +
        '.txt', 'w')
    f.write(
        '# ' + text_dependency +
        ' ASV\n# Header: a single ASV test is carried out for each test speaker, with the true speaker fixed, but having three elocutions.\n# GENDER\tINDEX\tAGE\tSCORE\tDECISION\n'
    )

    n_accepted = 0
    n_false_acceptance = 0
    n_rejected = 0
    n_false_rejection = 0
    '''
	----------------------------------------------------
	Training UBM for all UBM-speakers except the True one
	----------------------------------------------------
	'''
    cov_type = 'full'
    UBM_all = get_UBM_all(cov_type='full')
    '''
	----------------------------------------------------
	Getting threshold
	----------------------------------------------------
	'''
    threshold = get_score_threshold(speaker_samples_threshold=speaker_samples)

    files_in_folder = os.listdir(
        os.getcwd())  # List of files in current directory
    audiofiles_male = [
        x for x in files_in_folder if ('.wav' in x) and ('M' in x) and (
            'UBM' not in x) and ('_test' not in x) and ('_true' not in x)
    ]
    audiofiles_female = [
        x for x in files_in_folder if ('.wav' in x) and ('F' in x) and (
            'UBM' not in x) and ('_test' not in x) and ('_true' not in x)
    ]
    audiofiles_S = [
        x for x in files_in_folder if ('.wav' in x) and ('S' in x) and (
            'UBM' not in x) and ('_test' not in x) and ('_true' not in x)
    ]

    # ------------------------------------------------------------------------
    # Running through the lists of utterances from non-True speakers
    for gender_file_list in [audiofiles_male, audiofiles_female]:
        for current_test_speaker in gender_file_list:
            print '\nTEST 04'
            print 'For file ' + current_test_speaker + ':'

            [gender, index, aux] = current_test_speaker.split(
                '_')  # "aux" is just a disposable variable
            [age, aux] = aux.split('.')
            '''
			----------------------------------------------------
			Training test speaker
			----------------------------------------------------
			'''
            [fs, audio_test] = read_audiofile(current_test_speaker,
                                              normalize=True)

            t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
            nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
            nfft = 512

            ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)
            '''
			----------------------------------------------------
			Scoring
			----------------------------------------------------
			'''
            if speaker_samples == 'combined':

                score_UBM = UBM_all.score(ceps_test)
                score_true = model_true.score(ceps_test)

                score = np.sum(score_true - score_UBM)
                print '\nScore: ', score
            else:  # Neste caso speaker_samples=='separated'

                score_UBM = UBM_all.score(ceps_test)
                score01 = model_true_01.score(ceps_test)
                score02 = model_true_02.score(ceps_test)
                score03 = model_true_03.score(ceps_test)

                score_true = np.max(np.array([score01, score02, score03]))

                score = np.sum(score_true - score_UBM)
                print '\nScore: ', score

            print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

            # ----------------------------------
            # DECISION
            # ----------------------------------
            if score >= threshold:
                decision = 'ACCEPT'
                n_accepted += 1
                n_false_acceptance += 1
            else:
                decision = 'Reject'
                n_rejected += 1

            f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) +
                    '\t' + decision + '\n')

    # ------------------------------------------------------------------------
    # Running through the utterances of true speaker (prefix = S)
    for current_test_speaker in audiofiles_S:
        print '\nTEST 04'
        print 'For file ' + current_test_speaker + ':'

        [gender, index, aux] = current_test_speaker.split(
            '_')  # "aux" is just a disposable variable
        [age, aux] = aux.split('.')
        '''
		----------------------------------------------------
		Training test speaker
		----------------------------------------------------
		'''
        [fs, audio_test] = read_audiofile(current_test_speaker, normalize=True)

        t_frame = 20 * 10**(-3)  # Duration in seconds of each frame
        nwin = t_frame * fs  # nwin is the number of samples per frame. Para t_frame=20ms e fs=16kHz, nwin=320
        nfft = 512

        ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)
        '''
		----------------------------------------------------
		Scoring
		----------------------------------------------------
		'''
        if speaker_samples == 'combined':

            score_UBM = UBM_all.score(ceps_test)
            score_true = model_true.score(ceps_test)

            score = np.sum(score_true - score_UBM)
            print '\nScore: ', score
        else:  # Neste caso speaker_samples=='separated'

            score_UBM = UBM_all.score(ceps_test)
            score01 = model_true_01.score(ceps_test)
            score02 = model_true_02.score(ceps_test)
            score03 = model_true_03.score(ceps_test)

            score_true = np.max(np.array([score01, score02, score03]))

            score = np.sum(score_true - score_UBM)
            print '\nScore: ', score

        print "Difference between sum(log) probabilites (True VS. UBM_all): ", score

        # ----------------------------------
        # DECISION
        # ----------------------------------
        if score >= threshold:
            decision = 'ACCEPT'
            n_accepted += 1
        else:
            decision = 'Reject'
            n_rejected += 1
            n_false_rejection += 1

        f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\t' +
                decision + '\n')

    f.write('# Number of accepted: ' + str(n_accepted) +
            '\n# Number of rejected: ' + str(n_rejected) +
            '\n# Number of false-acceptance: ' + str(n_false_acceptance) +
            '\n# Number of false-rejection: ' + str(n_false_rejection) +
            '\n# Threshold: ' + str(threshold))
    f.close()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: ASV_old_02.py Projeto: gboaviagem/TCC

def evaluate_all_vs_true():
	'''
	# ======================================================================
	# TESTING WITH ALL SPEAKERS AGAINST A SINGLE TRUE SPEAKER
	'''
	files_in_folder = os.listdir(os.getcwd()) # List of files in current directory
	audiofiles_male = [x for x in files_in_folder if ('.wav' in x) and ('M' in x) and ('UBM' not in x) and ('_true' not in x) and ('_test' not in x)]
	audiofiles_female = [x for x in files_in_folder if ('.wav' in x) and ('F' in x) and ('UBM' not in x) and ('_true' not in x) and ('_test' not in x)]
	
	print 'len(audiofiles_male): ', len(audiofiles_male)
	print 'len(audiofiles_female): ', len(audiofiles_female)
	
	true_speakers_list = audiofiles_male + audiofiles_female + ['01_true_speaker.wav','02_true_speaker.wav','03_true_speaker.wav'] # Concatenation of both lists

	for true_speaker in true_speakers_list:
	
		'''
		----------------------------------------------------
		Training UBM for all training speakers (male & female) except the True one
		----------------------------------------------------
		'''
		cov_type = 'full'
		UBM_all = get_UBM_all(cov_type='full',exclude_speaker = true_speaker)
	
		'''
		----------------------------------------------------
		Setting up .txt files and lists
		----------------------------------------------------
		'''
		f = open('Verification_Test_02_' + true_speaker.split('.')[0] + '.txt','w')
		f.write('# Header: a single ASV test is carried out for each speaker S, with the speaker ' + true_speaker.split('.')[0] + ' being the True Speaker in all tests.\n# GENDER\tINDEX\tAGE\tSCORE\n')

		for gender_file_list in [audiofiles_male, audiofiles_female]:
			for current_test_speaker in gender_file_list:
				print '\nTEST 02 (all against one)'
				print 'For file ' + current_test_speaker + ':'
		
				[gender,index,aux] = current_test_speaker.split('_') # "aux" is just a disposable variable
				[age,aux] = aux.split('.')
		
				'''
				----------------------------------------------------
				Training true speaker
				----------------------------------------------------
				'''
				fs, audio_true = spwave.read(true_speaker)

				t_frame = 20*10**(-3) # Duration in seconds of each frame
				nwin = t_frame*fs
				# nwin is the number of samples per frame.
				# Para t_frame=20ms e fs=16kHz, nwin=320
				nfft = 512

				ceps_true, mspec, spec = mfcc(audio_true, nwin, nfft, fs, nceps)

				ngaussians = 10
				cov_type = 'full'
			
				gmm = GMM(n_components = ngaussians, covariance_type = cov_type)
				model_true = gmm.fit(ceps_true)
				print "model_true converged? ",model_true.converged_
		
				'''
				----------------------------------------------------
				Training test speaker
				----------------------------------------------------
				'''
				fs, audio_test = spwave.read(current_test_speaker)

				nwin = t_frame*fs
				# nwin is the number of samples per frame.
				# Para t_frame=20ms e fs=16kHz, nwin=320

				ceps_test, mspec, spec = mfcc(audio_test, nwin, nfft, fs, nceps)


				'''
				----------------------------------------------------
				Scoring
				----------------------------------------------------
				'''
				log_prob_true = model_true.score(ceps_test)
				log_prob_UBM = UBM_all.score(ceps_test)

				print np.sum(log_prob_true)
				print np.sum(log_prob_UBM)
				score = np.sum(log_prob_true) - np.sum(log_prob_UBM)
				print "Difference between sum(log) probabilites (True VS. UBM_all): ", score
		
				f.write(gender + '\t' + index + '\t' + age + '\t' + str(score) + '\n')
		f.close()