示例#1
0
def main():
    path = ""  # path string
    confirm = ""  # confirms
    path_json = {}
    delim = Path_OS_Assist()

    while (path == ""):
        temp = input("Enter the path to the CSAI_Voice_Assistant repository "
                     "in your local machine: ")

        while not (confirm.lower() == "n" or confirm.lower() == "y"):
            print("Please confirm that this is the path you "
                  "would like to add:\n\n Path: %s" % temp)
            print("\n\n(y) for yes | (n) for no")
            confirm = input()

            if (confirm == "n"):
                confirm = ""
                break
            if (confirm == "y"):
                path = temp

    path_json["PATH"] = path

    with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim),
              "w") as in_json:
        json.dump(path_json, in_json)
        print("Path %s has been added to Utils/PATH.json. If an error has "
              "occurred, you can run the program again and reinsert the path")
示例#2
0
def load_speech_adaption():
    delim = Path_OS_Assist()

    with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, \
        delim), "r") as path_json:
        REPO_PATH = json.load(path_json)["PATH"]

    JSON_PATH = REPO_PATH + "%sData%sWakeWord%sMFCC%s" % \
            (delim, delim, delim, delim)

    with open(os.getcwd() + "%sUtils%sData%sspeech_adaption_entities.txt" % \
            (delim, delim, delim), "r") as spch_ents:
        ents = [x.replace('\n', '') for x in spch_ents.readlines()]

    new_name = ""
    for i in range(len(ents)):

        if i % 2 == 1:
            new_name += ents[i]
            ents.append(new_name)
            new_name = ""

        else:
            new_name += (ents[i] + " ")

    print(ents)
def main():
    parseVal = parser()
    args = parseVal[0]
    isWakeWord = parseVal[1]
    isNotWakeWord = parseVal[2]
    delim = Path_OS_Assist()
    with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim),
              "r") as path_json:
        REPO_PATH = json.load(path_json)["PATH"]
    if isWakeWord:
        path = os.path.join(REPO_PATH, "Data", "WakeWord", "Audio",
                            "Wake_Word")
        split(args, path, args.p1)
    if isNotWakeWord:
        path = os.path.join(REPO_PATH, "Data", "WakeWord", "Audio",
                            "Not_Wake_Word")
        split(args, path, args.p2)
示例#4
0
import wave

import numpy as np

from speechpy.feature import mfcc
from Utils.OS_Find import Path_OS_Assist

# CONSTANTS
RATE = 16000  # sample rate
WINDOW = 0.128  # size of window
STRIDE = 0.064  # time between each window
MFCC = 13  # number of desired MFCCs
FILTER_BANKS = 20  # number of filter banks to compute
FFT_NUM = 512  # length of fast fourier transform window

delim = Path_OS_Assist()

with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim), "r") \
        as PATH_JSON:
    REPO_PATH = json.load(PATH_JSON)["PATH"]

AUDIO_PATH = "%sData%sWakeWord%sAudio" % (delim, delim, delim)


class Feature_Extraction:
    def __init__(self):
        self.words = {}

    def Convert_To_MFCC(self, wf):
        '''
        Converts audio byte streams to MFCCs
    def __init__(self):
        """
        Constructor for the Model class

        Args:
            self - The current object

        Returns:
            None

        """

        self.delim = Path_OS_Assist()

        with open(
                os.getcwd() + "%sUtils%sPATH.json" % (self.delim, self.delim),
                "r") as path_json:
            self.REPO_PATH = json.load(path_json)["PATH"]

        self.JSON_PATH = self.REPO_PATH + "%sData%sWakeWord%sMFCC%s" % \
            (self.delim, self.delim, self.delim, self.delim)

        # name of json data files
        self.WW_TRAIN = "Wake_Word_Train_data.json"
        self.NWW_TRAIN = "Not_Wake_Word_Train_data.json"
        self.WW_TEST = "Wake_Word_Test_data.json"
        self.NWW_TEST = "Not_Wake_Word_Test_data.json"
        self.WW_DATA = "ww_data.json"
        self.NWW_DATA = "nww_data.json"

        self.CONFIDENCE = 0.6  # prediction confidence
        self.GRU_UNITS = 64  # GRU unit size
        self.DROPOUT = 0.3  # dropout size
        self.ACTIVATIONS = 4  # number of activations for confident activation
        self.EPOCHS = 20  # number of fwd and bckwd props
        self.BATCH_SIZE = 8  # batch size

        self.ww_test_data = {}
        self.ww_test_data_keys = []
        self.ww_train_data = {}
        self.ww_train_data_keys = []

        # not wake word train & test data with shuffled list of keys
        self.nww_test_data = {}
        self.nww_test_data_keys = []
        self.nww_train_data = {}
        self.nww_train_data_keys = []

        self.ww_data = {}
        self.ww_data_keys = []
        self.ww_data = {}
        self.ww_data_keys = []

        # not wake word train & test data with shuffled list of keys
        self.nww_data = {}
        self.nww_data_keys = []
        self.nww_data = {}
        self.nww_data_keys = []

        # input list of training & test data and labels
        self.train_data = []
        self.train_labels = []
        self.test_data = []
        self.test_labels = []
    def __init__(self):

        # obtain OS-specific delimiter
        self.delim = Path_OS_Assist()

        # load the absolute path to the repo
        with open(os.getcwd() + "%sUtils%sPATH.json" % (self.delim, \
            self.delim), "r") as path_json:
            self.REPO_PATH = json.load(path_json)["PATH"]

        # instantiate wake word model class
        self.ww_model = Model()

        # get path for credentials
        self.credential_path = "%s%sScripts%sUtils%sData%sauth.json" \
                            % (self.REPO_PATH, self.delim,
                                self.delim, self.delim, self.delim)
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.credential_path

        # prediction parameters
        self.CONFIDENCE = 0.6  # prediction confidence
        self.ACTIVATIONS = 4  # number of activations for confident activation

        # audio input parameters
        self.CHUNK = 2048
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 16000
        self.RECORD_SECONDS = 5

        # mfcc feature parameters
        self.WINDOW = 0.128
        self.STRIDE = 0.064
        self.MFCC = 13
        self.FILTER_BANKS = 20
        self.FFT_NUM = 512

        # pyaudio i/o object instantiation
        self.pa_i = pyaudio.PyAudio()
        self.pa_o = pyaudio.PyAudio()

        # loading speech adaption phrases
        self.Load_Speech_Adaption()

        # instantiate GCP STT & TTS objects
        self.stt_client = speech.SpeechClient()
        self.tts_client = texttospeech.TextToSpeechClient()

        self.file_name = os.path.join(os.path.dirname(__file__), 'resources',
                                      'audio.raw')

        # load the desired model
        self.ww_model.load("%s%sScripts%sUtils%sData%smodel.h5" %\
                (self.REPO_PATH, self.delim, self.delim, self.delim, self.delim))

        # print the summary of the model
        print(self.ww_model.model.summary())

        # open an input audio data stream
        self.istream = self.pa_i.open(format=self.FORMAT,
                                      channels=self.CHANNELS,
                                      rate=self.RATE,
                                      input=True,
                                      frames_per_buffer=self.CHUNK)

        # open an output audio data stream
        self.ostream = self.pa_o.open(format=self.FORMAT,
                                      channels=self.CHANNELS,
                                      rate=24000,
                                      output=True,
                                      frames_per_buffer=self.CHUNK)

        # contains the chunks of streamed data
        self.frames = []

        # counts for confident activations
        self.act_count = 0
示例#7
0
def main(username):
    CHUNK = 2048  # Buffer size
    FORMAT = pyaudio.paInt16  # Sample Size
    CHANNELS = 1  # Sample Depth
    RATE = 16000  # Sample Rate
    RECORD_SECONDS = 2.5  # Recording Time

    delim = Path_OS_Assist()
    quit_inp = 0  # var for quitting out of program

    with open(os.getcwd() + "%sUtils%sPATH.json" % (delim, delim),
              "r") as path_json:
        REPO_PATH = json.load(path_json)["PATH"]

    while (quit_inp != 'q'):
        feat_type = 0  # wake or not wake word label
        gender = 0  # gender label
        end_desc_sess = 0  # ends the current description session

        # ensures proper input of either ww or notww
        while not (feat_type == "ww" or feat_type == "notww"):
            feat_type = input("WW or NotWW: ").lower()

        if (feat_type == "ww"):
            ww_noise = 0

            target_dir = "Wake_Word"  # used for setting the correct directory
            first_name = input(
                "First Name: ").lower()  # used for labeling file
            last_name = input("Last Name: ").lower()

            # ensures proper input
            while not (gender == "m" or gender == "f"):
                gender = input("Male (m) or Female (f): ").lower()

            # labeling brief description
            ww_descr = (input("Enter the description: ").lower()).replace(
                " ", "-")

            # labeling the recording location
            ww_loc = (input("Location: ").lower()).replace(" ", "-")

            while not (ww_noise == 'q' or ww_noise == 'm' or ww_noise == 'l'):
                ww_noise = input(
                    "Noise Level - Quiet (Q) Moderate (M) Loud (L): ").lower()

        else:
            target_dir = "Not_Wake_Word"
            nww_noise = 0
            nww_descr = ((input("Enter description: ")).lower()).replace(
                " ", "-")
            nww_loc = (input("Location: ").lower()).replace(" ", "-")

            while not (nww_noise == 'q' or nww_noise == 'm'
                       or nww_noise == 'l'):
                nww_noise = input(
                    "Noise Level - Quiet (Q) Moderate (M) Loud (L): ").lower()

        while (end_desc_sess != 'e'):

            # PyAudio instantiation
            p = pyaudio.PyAudio()

            # Audio data byte list
            frames = []

            # Count down
            print("Recording in\n3")
            time.sleep(1)
            print("2")
            time.sleep(1)
            print("1")
            time.sleep(1)

            # Starts the audio stream
            stream = p.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            output=True,
                            frames_per_buffer=CHUNK)

            print("**RECORDING**")

            # Reads the audio data buffer and adds it to the "frames" list
            for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
                data = stream.read(CHUNK)
                frames.append(data)

            print("**RECORDING ENDED**")

            # Audio playback
            while (input("Play Audio (p): ").lower() == 'p'):
                for values in frames:
                    stream.write(values)

            # Stop and close the audio stream
            stream.stop_stream()
            stream.close()

            # Destruct the PyAudio instantiation
            p.terminate()

            # Skip the audio save if "d"
            if (input("To delete, type (d): ").lower() == 'd'):
                pass
            else:
                # Get the current time
                curr_time = time.strftime("%m%d%Y%H%M%S", time.localtime())

                # Save the file name
                if (feat_type == "ww"):
                    file_name = "ww_" + gender + "_" + ww_descr + "_" + ww_loc + "_" + \
                        ww_noise + "_" + last_name + "_" + first_name + "_" + curr_time + \
                        "_" + username + ".wav"

                else:
                    file_name = "notww_" + nww_descr + "_" + nww_loc + "_" + nww_noise + \
                        "_" + curr_time + "_" + username + ".wav"

                print(file_name + " has been saved.")

                # Store the audio in the "Data/Target directory"
                # <<FOR LINUX OR MAC OS, REPLACE \\ with />>
                wf = wave.open(
                    "%s%sData%sWakeWord%sAudio%s%s%s%s" %
                    (REPO_PATH, delim, delim, delim, delim, target_dir, delim,
                     file_name), 'wb')

                wf.setnchannels(CHANNELS)
                wf.setsampwidth(p.get_sample_size(FORMAT))
                wf.setframerate(RATE)
                wf.writeframes(b''.join(frames))
                wf.close()

            end_desc_sess = input(
                "If finished with description session, "
                "type (e); otherwise, type anything else:").lower()

        quit_inp = input("If finished recording, type (q). Otherwise, "
                         "type anything else: ").lower()