def main(): path = "" # path string confirm = "" # confirms path_json = {} delim = Path_OS_Assist() while (path == ""): temp = input("Enter the path to the CSAI_Voice_Assistant repository " "in your local machine: ") while not (confirm.lower() == "n" or confirm.lower() == "y"): print("Please confirm that this is the path you " "would like to add:\n\n Path: %s" % temp) print("\n\n(y) for yes | (n) for no") confirm = input() if (confirm == "n"): confirm = "" break if (confirm == "y"): path = temp path_json["PATH"] = path with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim), "w") as in_json: json.dump(path_json, in_json) print("Path %s has been added to Utils/PATH.json. If an error has " "occurred, you can run the program again and reinsert the path")
def load_speech_adaption(): delim = Path_OS_Assist() with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, \ delim), "r") as path_json: REPO_PATH = json.load(path_json)["PATH"] JSON_PATH = REPO_PATH + "%sData%sWakeWord%sMFCC%s" % \ (delim, delim, delim, delim) with open(os.getcwd() + "%sUtils%sData%sspeech_adaption_entities.txt" % \ (delim, delim, delim), "r") as spch_ents: ents = [x.replace('\n', '') for x in spch_ents.readlines()] new_name = "" for i in range(len(ents)): if i % 2 == 1: new_name += ents[i] ents.append(new_name) new_name = "" else: new_name += (ents[i] + " ") print(ents)
def main(): parseVal = parser() args = parseVal[0] isWakeWord = parseVal[1] isNotWakeWord = parseVal[2] delim = Path_OS_Assist() with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim), "r") as path_json: REPO_PATH = json.load(path_json)["PATH"] if isWakeWord: path = os.path.join(REPO_PATH, "Data", "WakeWord", "Audio", "Wake_Word") split(args, path, args.p1) if isNotWakeWord: path = os.path.join(REPO_PATH, "Data", "WakeWord", "Audio", "Not_Wake_Word") split(args, path, args.p2)
import wave import numpy as np from speechpy.feature import mfcc from Utils.OS_Find import Path_OS_Assist # CONSTANTS RATE = 16000 # sample rate WINDOW = 0.128 # size of window STRIDE = 0.064 # time between each window MFCC = 13 # number of desired MFCCs FILTER_BANKS = 20 # number of filter banks to compute FFT_NUM = 512 # length of fast fourier transform window delim = Path_OS_Assist() with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim), "r") \ as PATH_JSON: REPO_PATH = json.load(PATH_JSON)["PATH"] AUDIO_PATH = "%sData%sWakeWord%sAudio" % (delim, delim, delim) class Feature_Extraction: def __init__(self): self.words = {} def Convert_To_MFCC(self, wf): ''' Converts audio byte streams to MFCCs
def __init__(self): """ Constructor for the Model class Args: self - The current object Returns: None """ self.delim = Path_OS_Assist() with open( os.getcwd() + "%sUtils%sPATH.json" % (self.delim, self.delim), "r") as path_json: self.REPO_PATH = json.load(path_json)["PATH"] self.JSON_PATH = self.REPO_PATH + "%sData%sWakeWord%sMFCC%s" % \ (self.delim, self.delim, self.delim, self.delim) # name of json data files self.WW_TRAIN = "Wake_Word_Train_data.json" self.NWW_TRAIN = "Not_Wake_Word_Train_data.json" self.WW_TEST = "Wake_Word_Test_data.json" self.NWW_TEST = "Not_Wake_Word_Test_data.json" self.WW_DATA = "ww_data.json" self.NWW_DATA = "nww_data.json" self.CONFIDENCE = 0.6 # prediction confidence self.GRU_UNITS = 64 # GRU unit size self.DROPOUT = 0.3 # dropout size self.ACTIVATIONS = 4 # number of activations for confident activation self.EPOCHS = 20 # number of fwd and bckwd props self.BATCH_SIZE = 8 # batch size self.ww_test_data = {} self.ww_test_data_keys = [] self.ww_train_data = {} self.ww_train_data_keys = [] # not wake word train & test data with shuffled list of keys self.nww_test_data = {} self.nww_test_data_keys = [] self.nww_train_data = {} self.nww_train_data_keys = [] self.ww_data = {} self.ww_data_keys = [] self.ww_data = {} self.ww_data_keys = [] # not wake word train & test data with shuffled list of keys self.nww_data = {} self.nww_data_keys = [] self.nww_data = {} self.nww_data_keys = [] # input list of training & test data and labels self.train_data = [] self.train_labels = [] self.test_data = [] self.test_labels = []
def __init__(self): # obtain OS-specific delimiter self.delim = Path_OS_Assist() # load the absolute path to the repo with open(os.getcwd() + "%sUtils%sPATH.json" % (self.delim, \ self.delim), "r") as path_json: self.REPO_PATH = json.load(path_json)["PATH"] # instantiate wake word model class self.ww_model = Model() # get path for credentials self.credential_path = "%s%sScripts%sUtils%sData%sauth.json" \ % (self.REPO_PATH, self.delim, self.delim, self.delim, self.delim) os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.credential_path # prediction parameters self.CONFIDENCE = 0.6 # prediction confidence self.ACTIVATIONS = 4 # number of activations for confident activation # audio input parameters self.CHUNK = 2048 self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 16000 self.RECORD_SECONDS = 5 # mfcc feature parameters self.WINDOW = 0.128 self.STRIDE = 0.064 self.MFCC = 13 self.FILTER_BANKS = 20 self.FFT_NUM = 512 # pyaudio i/o object instantiation self.pa_i = pyaudio.PyAudio() self.pa_o = pyaudio.PyAudio() # loading speech adaption phrases self.Load_Speech_Adaption() # instantiate GCP STT & TTS objects self.stt_client = speech.SpeechClient() self.tts_client = texttospeech.TextToSpeechClient() self.file_name = os.path.join(os.path.dirname(__file__), 'resources', 'audio.raw') # load the desired model self.ww_model.load("%s%sScripts%sUtils%sData%smodel.h5" %\ (self.REPO_PATH, self.delim, self.delim, self.delim, self.delim)) # print the summary of the model print(self.ww_model.model.summary()) # open an input audio data stream self.istream = self.pa_i.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) # open an output audio data stream self.ostream = self.pa_o.open(format=self.FORMAT, channels=self.CHANNELS, rate=24000, output=True, frames_per_buffer=self.CHUNK) # contains the chunks of streamed data self.frames = [] # counts for confident activations self.act_count = 0
def main(username): CHUNK = 2048 # Buffer size FORMAT = pyaudio.paInt16 # Sample Size CHANNELS = 1 # Sample Depth RATE = 16000 # Sample Rate RECORD_SECONDS = 2.5 # Recording Time delim = Path_OS_Assist() quit_inp = 0 # var for quitting out of program with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim), "r") as path_json: REPO_PATH = json.load(path_json)["PATH"] while (quit_inp != 'q'): feat_type = 0 # wake or not wake word label gender = 0 # gender label end_desc_sess = 0 # ends the current description session # ensures proper input of either ww or notww while not (feat_type == "ww" or feat_type == "notww"): feat_type = input("WW or NotWW: ").lower() if (feat_type == "ww"): ww_noise = 0 target_dir = "Wake_Word" # used for setting the correct directory first_name = input( "First Name: ").lower() # used for labeling file last_name = input("Last Name: ").lower() # ensures proper input while not (gender == "m" or gender == "f"): gender = input("Male (m) or Female (f): ").lower() # labeling brief description ww_descr = (input("Enter the description: ").lower()).replace( " ", "-") # labeling the recording location ww_loc = (input("Location: ").lower()).replace(" ", "-") while not (ww_noise == 'q' or ww_noise == 'm' or ww_noise == 'l'): ww_noise = input( "Noise Level - Quiet (Q) Moderate (M) Loud (L): ").lower() else: target_dir = "Not_Wake_Word" nww_noise = 0 nww_descr = ((input("Enter description: ")).lower()).replace( " ", "-") nww_loc = (input("Location: ").lower()).replace(" ", "-") while not (nww_noise == 'q' or nww_noise == 'm' or nww_noise == 'l'): nww_noise = input( "Noise Level - Quiet (Q) Moderate (M) Loud (L): ").lower() while (end_desc_sess != 'e'): # PyAudio instantiation p = pyaudio.PyAudio() # Audio data byte list frames = [] # Count down print("Recording in\n3") time.sleep(1) print("2") time.sleep(1) print("1") time.sleep(1) # Starts the audio stream stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK) print("**RECORDING**") # Reads the audio data buffer and adds it to the "frames" list for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print("**RECORDING ENDED**") # Audio playback while (input("Play Audio (p): ").lower() == 'p'): for values in frames: stream.write(values) # Stop and close the audio stream stream.stop_stream() stream.close() # Destruct the PyAudio instantiation p.terminate() # Skip the audio save if "d" if (input("To delete, type (d): ").lower() == 'd'): pass else: # Get the current time curr_time = time.strftime("%m%d%Y%H%M%S", time.localtime()) # Save the file name if (feat_type == "ww"): file_name = "ww_" + gender + "_" + ww_descr + "_" + ww_loc + "_" + \ ww_noise + "_" + last_name + "_" + first_name + "_" + curr_time + \ "_" + username + ".wav" else: file_name = "notww_" + nww_descr + "_" + nww_loc + "_" + nww_noise + \ "_" + curr_time + "_" + username + ".wav" print(file_name + " has been saved.") # Store the audio in the "Data/Target directory" # <<FOR LINUX OR MAC OS, REPLACE \\ with />> wf = wave.open( "%s%sData%sWakeWord%sAudio%s%s%s%s" % (REPO_PATH, delim, delim, delim, delim, target_dir, delim, file_name), 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() end_desc_sess = input( "If finished with description session, " "type (e); otherwise, type anything else:").lower() quit_inp = input("If finished recording, type (q). Otherwise, " "type anything else: ").lower()