Пример #1
0
def assistant():
    name = texttospeech('how can I help you')

    # Play audio "How can I help you?" in user's earphone
    pa.play(name)

    # Let the user record his/her input voice
    filename = ra.record()

    # Recognize and translate the user's input speech to text using IBM Bluemix NLP service
    speech2=speechtotext(filename)

    # speech2[0] returns the confidence of the text
    # speech2[1] returns the text content
    # We assume that when confidence > 0.25, the user records effective audio
    if Speech2[0] > 0.25:
        # i\If the user wants to set timer
        if "time" in speech2[1]:
            texttospeech('how many minutes')
            pa.play('output.wav')
            filename = ra.record()
            speech2=speechtotext(filename)
            if speech2[0] > 0.25:
                settime = int(speech2[1])
                set_timer(settime)
            else:
                texttospeech('Sorry I do not understand')
                pa.play('output.wav')

        # If the user wants to check the doneness of the food
        elif 'check' in speech2[1]:
            doneness = pm.well_done()
            texttospeech(doneness)
            pa.play('output.wav')

        # If the user wants to recognize the food
        elif 'recognize' in speech2[1]:
            tmp = 'The labels for this object are'
            label_list = pm.what_is_it()
            for label in label_list:
                tmp = tmp + str(label)
            texttospeech(tmp)
            pa.play('output.wav')

        # We ignore all other kinds of input speech
        else:
            texttospeech('Sorry I do not understand')
            pa.play('output.wav')

    # The confidence value is too low, we ignore this input
    else:
        texttospeech('Sorry I do not understand')
        pa.play('output.wav')
        time.sleep(30)
    audi = sys.argv[1]
else: 
	audi=input("Enter a wav file: ")

if len(audi) <= 1:
	print("invalid input \n")
	exit()

print("\n")
if (audi == 'rec' and len(sys.argv[2]) ):
	if sys.argv[2].isdigit():
		seconds = int(sys.argv[2])
		tmpa = " recording : " +str(seconds)+"s"
		print(tmpa)x
		spinner.start(tmpa)
		record = ra.record(seconds, "audio.wav")
		spinner.stop()
		print(" saved file name : ", record , "\n\n")
		audi =record
	else:
		print("invalid input \n")
		exit()


 
 
spinner.start(" analysing audio ...")
 

filename = audi
n_speakers = -1
Пример #3
0
        #This will be the file name the user's attempt will be saved in
        filename = DIR_PATH + "/file" + str(num_files) + ".wav"


        #Prompt user to begin recording
        record_msg = "Click to start recording.\n" + \
                    "Press Control-C while in the command line application to stop recording\n" + \
                    ("Otherwise, recording will stop after %d seconds." % record_audio.RECORD_SECONDS)
        g.msgbox(record_msg, ok_button = "Begin Recording")
    except Exception as e:
        print "Could not create GUI", e
        #Fall back to command line interface
        enter = raw_input("Press Enter to record...")

    print "Recording audio..."
    record_audio.record(filename)
    print "recorded audio"

#Allow user to analyze an existing file
if len(sys.argv) >= 2:
    filename = sys.argv[1]

downsample = 1
samplerate = 44100 // downsample
if len( sys.argv ) > 2: samplerate = int(sys.argv[2])

win_s = 4096 // downsample # fft size
hop_s = 512  // downsample # hop size

s = source(filename, samplerate, hop_s)
samplerate = s.samplerate
Пример #4
0
import warnings

warnings.filterwarnings("ignore")

print('Recording begins in 3 seconds ...')
time.sleep(3)

# load the model from disk
loaded_classifier = pickle.load(open('finalised_model.sav', 'rb'))

from live_extraction import extract_features
from record_audio import record

path = "C:\\Users\\Atulya\Documents\\GitHub\\gender-classifier-using-voice\\ML_final\\"

record(path)
extract_features(path)

# Importing the libraries
import pandas as pd
import numpy as np

#Importing the dataset
original_dataset = pd.read_csv('features.csv')
test_dataset = pd.read_csv('recorded_audio_features.csv')

original_dataset = original_dataset.iloc[:, 1:-1]
test_dataset = test_dataset.iloc[:, 1:]

appended_dataset = original_dataset.append(test_dataset)
import warnings
warnings.filterwarnings("ignore")
import time
from record_audio import record

modelpath = "speaker_models/"

gmm_files = [
    os.path.join(modelpath, fname) for fname in os.listdir(modelpath)
    if fname.endswith('.gmm')
]

#Load the Gaussian gender Models
models = [cPickle.load(open(fname, 'r')) for fname in gmm_files]
speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files]

# Read the test directory and get the list of test audio files
sr, audio = record()
vector = extract_features(audio, sr)

log_likelihood = np.zeros(len(models))

for i in range(len(models)):
    gmm = models[i]  #checking with each model one by one
    scores = np.array(gmm.score(vector))
    log_likelihood[i] = scores.sum()

winner = np.argmax(log_likelihood)
print "\nDetected as - ", speakers[winner]
time.sleep(1.0)