def assistant(): name = texttospeech('how can I help you') # Play audio "How can I help you?" in user's earphone pa.play(name) # Let the user record his/her input voice filename = ra.record() # Recognize and translate the user's input speech to text using IBM Bluemix NLP service speech2=speechtotext(filename) # speech2[0] returns the confidence of the text # speech2[1] returns the text content # We assume that when confidence > 0.25, the user records effective audio if Speech2[0] > 0.25: # i\If the user wants to set timer if "time" in speech2[1]: texttospeech('how many minutes') pa.play('output.wav') filename = ra.record() speech2=speechtotext(filename) if speech2[0] > 0.25: settime = int(speech2[1]) set_timer(settime) else: texttospeech('Sorry I do not understand') pa.play('output.wav') # If the user wants to check the doneness of the food elif 'check' in speech2[1]: doneness = pm.well_done() texttospeech(doneness) pa.play('output.wav') # If the user wants to recognize the food elif 'recognize' in speech2[1]: tmp = 'The labels for this object are' label_list = pm.what_is_it() for label in label_list: tmp = tmp + str(label) texttospeech(tmp) pa.play('output.wav') # We ignore all other kinds of input speech else: texttospeech('Sorry I do not understand') pa.play('output.wav') # The confidence value is too low, we ignore this input else: texttospeech('Sorry I do not understand') pa.play('output.wav') time.sleep(30)
audi = sys.argv[1] else: audi=input("Enter a wav file: ") if len(audi) <= 1: print("invalid input \n") exit() print("\n") if (audi == 'rec' and len(sys.argv[2]) ): if sys.argv[2].isdigit(): seconds = int(sys.argv[2]) tmpa = " recording : " +str(seconds)+"s" print(tmpa)x spinner.start(tmpa) record = ra.record(seconds, "audio.wav") spinner.stop() print(" saved file name : ", record , "\n\n") audi =record else: print("invalid input \n") exit() spinner.start(" analysing audio ...") filename = audi n_speakers = -1
#This will be the file name the user's attempt will be saved in filename = DIR_PATH + "/file" + str(num_files) + ".wav" #Prompt user to begin recording record_msg = "Click to start recording.\n" + \ "Press Control-C while in the command line application to stop recording\n" + \ ("Otherwise, recording will stop after %d seconds." % record_audio.RECORD_SECONDS) g.msgbox(record_msg, ok_button = "Begin Recording") except Exception as e: print "Could not create GUI", e #Fall back to command line interface enter = raw_input("Press Enter to record...") print "Recording audio..." record_audio.record(filename) print "recorded audio" #Allow user to analyze an existing file if len(sys.argv) >= 2: filename = sys.argv[1] downsample = 1 samplerate = 44100 // downsample if len( sys.argv ) > 2: samplerate = int(sys.argv[2]) win_s = 4096 // downsample # fft size hop_s = 512 // downsample # hop size s = source(filename, samplerate, hop_s) samplerate = s.samplerate
import warnings warnings.filterwarnings("ignore") print('Recording begins in 3 seconds ...') time.sleep(3) # load the model from disk loaded_classifier = pickle.load(open('finalised_model.sav', 'rb')) from live_extraction import extract_features from record_audio import record path = "C:\\Users\\Atulya\Documents\\GitHub\\gender-classifier-using-voice\\ML_final\\" record(path) extract_features(path) # Importing the libraries import pandas as pd import numpy as np #Importing the dataset original_dataset = pd.read_csv('features.csv') test_dataset = pd.read_csv('recorded_audio_features.csv') original_dataset = original_dataset.iloc[:, 1:-1] test_dataset = test_dataset.iloc[:, 1:] appended_dataset = original_dataset.append(test_dataset)
import warnings warnings.filterwarnings("ignore") import time from record_audio import record modelpath = "speaker_models/" gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] #Load the Gaussian gender Models models = [cPickle.load(open(fname, 'r')) for fname in gmm_files] speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files sr, audio = record() vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print "\nDetected as - ", speakers[winner] time.sleep(1.0)