예제 #1
0
def main():

    vosk.SetLogLevel(-1)

    audio_path = sys.argv[1]
    out_path = sys.argv[2]

    model_path = 'vosk-model-small-de-0.15'
    sample_rate = 16000

    audio, sr = librosa.load(audio_path, sr=16000)

    # convert to 16bit signed PCM, as expected by VOSK
    int16 = numpy.int16(audio * 32768).tobytes()

    # XXX: Model must be downloaded from https://alphacephei.com/vosk/models
    # https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
    if not os.path.exists(model_path):
        raise ValueError(f"Could not find VOSK model at {model_path}")

    model = vosk.Model(model_path)
    recognizer = vosk.KaldiRecognizer(model, sample_rate)

    res = transcribe_words(recognizer, int16)
    df = pandas.DataFrame.from_records(res)
    df = df.sort_values('start')

    df.to_csv(out_path, index=False)
    print('Word segments saved to', out_path)
예제 #2
0
def run(command):
    q = queue.Queue()

    def callback(indata, frames, time, status):
        """This is called (from a separate thread) for each audio block."""
        if status:
            print(status, file=sys.stderr)
        q.put(bytes(indata))

    DEVICE_NUM = None
    MODEL = "model"
    device_info = sd.query_devices(DEVICE_NUM, 'input')
    # SAMPLE_RATE = int(device_info['default_samplerate'])
    SAMPLE_RATE = 16000
    model = vosk.Model(MODEL)
    rec = vosk.KaldiRecognizer(model, SAMPLE_RATE)
    try:
        with sd.RawInputStream(samplerate=SAMPLE_RATE,
                               blocksize=8000,
                               device=DEVICE_NUM,
                               dtype='int16',
                               channels=1,
                               callback=callback):
            while True:
                data = q.get()
                if rec.AcceptWaveform(data):
                    command.value = rec.Result()
    except KeyboardInterrupt:
        print('\nDone')
        exit(0)
    except Exception as e:
        exit(type(e).__name__ + ': ' + str(e))
def recordAudio():
    try:
        device_info = sd.query_devices(None, 'input')
        # soundfile expects an int, sounddevice provides a float:
        samplerate = int(device_info['default_samplerate'])

        model = vosk.Model("model")
        dump_fn = None
        with sd.RawInputStream(samplerate=samplerate,
                               blocksize=8000,
                               device=None,
                               dtype='int16',
                               channels=1,
                               callback=callback):
            # print('#' * 80)
            # print('Press Ctrl+C to stop the recording')
            # print('#' * 80)

            rec = vosk.KaldiRecognizer(model, samplerate)
            band = True
            while band:
                data = q.get()
                if rec.AcceptWaveform(data):
                    result = rec.Result()
                    band = False
                    value = json.loads(result)
                    print("Recorded text: {0}".format(value["text"]))
                    time.sleep(2)
                    return value["text"]
                else:
                    print(rec.PartialResult())
                    # if dump_fn is not None:
                    #     dump_fn.write(data)
    except Exception as e:
        return "error"
예제 #4
0
def recognize_speech(wav_path, lang="en", buffer_size=4000):

    download_model(lang)

    vosk.SetLogLevel(-1)

    wav_file = wave.open(wav_path, "rb")

    recognizer = vosk.KaldiRecognizer(
        vosk.Model("{}/{}".format(get_model_path(), lang)),
        wav_file.getframerate())

    words = []

    for index in tqdm(range(0, wav_file.getnframes(), buffer_size)):

        frames = wav_file.readframes(buffer_size)

        if recognizer.AcceptWaveform(frames):

            result = json.loads(recognizer.Result())

            if len(result["text"]) > 0:

                for token in result["result"]:
                    words.append({
                        "start": token["start"],
                        "end": token["end"],
                        "text": token["word"],
                    })

    return words
예제 #5
0
 def __init__(self, vosk_path='vosk-model-small-en-us-0.15'):
     print('Loading vosk...')
     vosk.SetLogLevel(-1)
     self.VOSK_PATH = vosk_path
     self.vosk_model = vosk.Model(self.VOSK_PATH)
     self.recognizer = vosk.KaldiRecognizer(self.vosk_model, 16000)
     print('Loaded vosk!')
예제 #6
0
def run():


    try:
        if args.model is None:
            args.model = "model"
        if not os.path.exists(args.model):
            print ("Please download a model for your language from https://alphacephei.com/vosk/models")
            print ("and unpack as 'model' in the current folder.")
            parser.exit(0)
        if args.samplerate is None:
            device_info = sd.query_devices(args.device, 'input')
            # soundfile expects an int, sounddevice provides a float:
            args.samplerate = int(device_info['default_samplerate'])

        model = vosk.Model(args.model)

        if args.filename:
            dump_fn = open(args.filename, "wb")
        else:
            dump_fn = None
        

        with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device, dtype='int16',
                                channels=1, callback=callback):
                print('#' * 80)
                print('Press Ctrl+C to stop the recording')
                print('#' * 80)

                rec = vosk.KaldiRecognizer(model, args.samplerate)
                while True:
                    data = q.get()
                    if rec.AcceptWaveform(data):
                        print(rec.Result())
                    else:
                        sentence = ast.literal_eval(rec.PartialResult())['partial'].split(' ')
                        print(sentence)

                        if len(sentence) < 6:
                            if any([True if s in flagged_words else False for s in sentence]):
                                root.configure(background='red')
                            else: 
                                root.configure(background='black')
                        else:
                            if any([True if s in flagged_words else False for s in sentence[-5:]]):
                                root.configure(background='red')
                            else: 
                                root.configure(background='black')
                            
                    

                    if dump_fn is not None:
                        dump_fn.write(data)

    except KeyboardInterrupt:
        print('\nDone')
        parser.exit(0)
    except Exception as e:
        parser.exit(type(e).__name__ + ': ' + str(e))
예제 #7
0
    def callback_recognize(self, req):
        # clear queue
        q.queue.clear()
        print("options:", len(req.options), req.options)
        print("language:", req.language)
        print("timeout:", str(req.timeout))
        timeout = (req.timeout if (req.timeout != 0) else 20)
        language = (req.language if (req.language != '') else self.language)

        # check if we need to change the language model
        print('current language: ' + self.language)
        if language != self.language:
            print('switching language to ' + language)
            # VOSK python API does not implement exception!
            # so we need to check the path by ourselves

            if os.path.exists(MODELS_PATH + language):
                self.model = vosk.Model(MODELS_PATH + language)
                self.language = language
            else:
                rospy.loginfo('could not load language model for ' + language)
                return speech_recognizeResponse('')

        with sd.RawInputStream(samplerate=self.device_samplerate,
                               blocksize=8000,
                               device=self.device_index,
                               dtype='int16',
                               channels=1,
                               callback=callback):

            rec = vosk.KaldiRecognizer(self.model, self.device_samplerate)

            t_start = time.time()
            should_stop = False
            transcript = ''
            while not should_stop:
                data = q.get()
                if rec.AcceptWaveform(data):
                    result = rec.Result()
                    # print(result)
                    jres = json.loads(result)
                    transcript = jres['text']
                    for option in req.options:
                        if option.strip() and option in transcript:
                            transcript = option
                    should_stop = True
                else:
                    result = rec.PartialResult()
                    # print(result)
                    jres = json.loads(result)
                    for option in req.options:
                        if option.strip() and option in jres['partial']:
                            transcript = option
                    should_stop = True if transcript else False
                should_stop = should_stop or (
                    (time.time() - t_start) > timeout)

        return speech_recognizeResponse(transcript)
예제 #8
0
def processVoskForever(pipe, fs, grammar):
    voice_model  = vosk.Model("/home/pi/vosk-model-small-en-us-0.3")
    max_num_iters_without_sound = 3
    buffer_size = max_num_iters_without_sound
    # TODO: Set process prio low.
    while True:
        recognizer = vosk.KaldiRecognizer(voice_model, fs, grammar)
        LOGGER.info("KaldiRecognizer created.")

        chunk_buffer = deque(maxlen=buffer_size)
        got_sound = False
        consecutive_without_sound = 0
        pipe_has_data = True
        bail_early = False
        while pipe_has_data and not bail_early:
            data = pipe.recv()
            if len(data) == 0:
                pipe_has_data = False
                LOGGER.info("Got end of line from audio pipe.")
            else:
                # Insert new chunk into buffer.
                chunk_buffer.append(data)
                float_data = np.frombuffer(data, dtype=np.int16).astype(np.float32)
                rms = np.sqrt(float_data.dot(float_data) / float_data.size)
                LOGGER.info(f"rms: {rms}")
                if rms > 200.0:
                    got_sound = True
                    consecutive_without_sound = 0
                else:
                    consecutive_without_sound += 1

                if got_sound:
                    # If we have got some sound, start popping from the chunk buffer.
                    # This will mean that processing is delayed a few iterations.
                    # It also means that we won't cut any speech from the stream when
                    # we start talking in the middle of a chunk.
                    recognizer.AcceptWaveform(chunk_buffer.popleft())

                # If we have started actually processing due to there being sound some chunk,
                # and we then have a number of chunks without any sounds, consider done.
                if got_sound and consecutive_without_sound > max_num_iters_without_sound:
                    bail_early = True

        # Empty chunk buffer into recognizer.
        LOGGER.info("Emptying buffer.")
        while len(chunk_buffer) > 0:
            recognizer.AcceptWaveform(chunk_buffer.popleft())

        # Empty pipe.
        LOGGER.info("Emptying leftovers in pipe.")
        while pipe_has_data:
            pipe_has_data = len(pipe.recv()) != 0

        result = recognizer.FinalResult()
        LOGGER.info(f"Got final result from recognizer:\n{result}")
        pipe.send(result)
        LOGGER.info("Result was sent on pipe.")
예제 #9
0
def get_recognizer(model):
    if not Path(model).exists():
        raise Exception(
            "Model {} doesn't exist, maybe download it from: https://alphacephei.com/vosk/models and unzip it here"
            .format(MODEL))
    if not ENABLE_VOSK_DEBUG:
        vosk.SetLogLevel(-1)
    vosk_model = vosk.Model(model)
    rec = vosk.KaldiRecognizer(vosk_model, AUDIO_BITRATE)
    return rec
예제 #10
0
파일: AutoEdit.py 프로젝트: ming0520/API
 def vosk_process(self):
     print('Loading vosk...')
     vosk.SetLogLevel(-1)
     int16 = np.int16(self.audioData * 32768).tobytes()
     vosk_path = self.VOSK_PATH
     vosk_model = vosk.Model(vosk_path)
     recognizer = vosk.KaldiRecognizer(vosk_model, 16000)
     print('Transcribing...')
     res = self.transcribe_words(recognizer, int16)
     df = pd.DataFrame.from_records(res)
     df = df.sort_values('start')
     print('Completed transcribe')
     self.df = df
예제 #11
0
 def __init__(self,
              vosk_model_path,
              wakeword_detector,
              nlu_dataset,
              client,
              samplerate=16000):
     self.stt = vosk.KaldiRecognizer(vosk.Model(vosk_model_path),
                                     samplerate)
     self.client = client
     self.samplerate = samplerate
     self.listener = Listener(samplerate, self.on_noise)
     self.wakeword_detector = wakeword_detector
     self.nlu_engine = SnipsNLUEngine(config=CONFIG_FR)
     self.nlu_engine.fit(json.load(open(nlu_dataset)))
예제 #12
0
 def __init__(self):
     self.q = queue.Queue()
     self.device = None
     try:
         model = "tools/model" #setting model location
         if not os.path.exists(model):
             print ("Please download a model for your language from https://alphacephei.com/vosk/models")
             print ("and unpack as 'model' in the tools folder.")
             exit(0)
         device_info = sd.query_devices(self.device, 'input')
         # soundfile expects an int, sound device provides a float:
         self.samplerate = int(device_info['default_samplerate'])
         model = vosk.Model(model)
         self.rec = vosk.KaldiRecognizer(model, self.samplerate) 
     except Exception as e:
         print("EXCEPTION : {}".format(e))
         exit(0)
예제 #13
0
파일: voice-input.py 프로젝트: markcda/asw
def main(t: transport.Transport) -> None:
  """Starts speech recognition."""
  import sounddevice, vosk, locale, queue, os.path, json
  
  # We select the appropriate model from the list of downloaded ones according to the language used 
  # in the system. If there is no suitable model, we take the first one that comes across. 
  #
  # You can specify a specific model by entering its name below instead of 'searched_folders[0]'.
  lang, _ = locale.getdefaultlocale()
  guess = '-' + lang[:2] + '-'
  searched_folders = [f for f in list_subdirs('models') if guess in f]
  if not searched_folders:
    searched_folders = list_subdirs('models')
  selected_model = searched_folders[0]
  print(f'Selected "{selected_model}".')
  vosk_model = vosk.Model(os.path.join('models', selected_model))
  
  audio_block_queue = queue.Queue()
  def checkout(indata, frames, time, status):
    """Writes recorded audio to queue that handled below."""
    if status:
      print(status, file=sys.stderr)
    audio_block_queue.put(bytes(indata))
  
  # Usually personal computers and laptops are equipped with a maximum of one microphone, so 
  # if there are microphones at all, we will choose the first one that comes across.
  #
  # If you have more than one microphone, you can specify which microphone to use by assigning
  # its name to 'device' kwarg.
  with sounddevice.RawInputStream(blocksize=8000, dtype='int16', channels=1, callback=checkout):
    sample_rate = int(sounddevice.query_devices(sounddevice.default.device, "input")["default_samplerate"])
    vosk_recognizer = vosk.KaldiRecognizer(vosk_model, sample_rate)
    print('Let\'s start recognizing...')
    try:
      while True:
        if SHUTDOWN:
          break
        data = audio_block_queue.get()
        if vosk_recognizer.AcceptWaveform(data):
          text = json.loads(vosk_recognizer.Result())["text"]
          for word in text.split():
            if word in ATTENTION_WORDS:
              print('- ' + text)
              break
    except KeyboardInterrupt:
      print('\nSpeech recognition is off.')
예제 #14
0
    def __init__(self, prefix, language):
        self.prefix = prefix
        self.language = language
        # find respeaker mic
        self.device_index = self.get_respeaker_device_index()
        if not self.device_index:
            rospy.logfatal("could not find Respeack microphone device")
            raise Exception('device')

        # open mic audio device
        device_info = sd.query_devices(self.device_index, 'input')
        # soundfile expects an int, sounddevice provides a float:
        self.device_samplerate = int(device_info['default_samplerate'])

        self.model = vosk.Model(MODELS_PATH + self.language)

        # start recognize service
        self.speech_recognize = rospy.Service(prefix + '/recognize',
                                              speech_recognize,
                                              self.callback_recognize)
예제 #15
0
 def __init__(self,
              flamingo_tools,
              model_path=f"Assistant_Brain/models/sr_model"):
     self.flamingo_tools = flamingo_tools
     self.q = queue.Queue()
     self.model_path = model_path
     self.interpreter = InterpretSpeech()
     self.device = None
     self.rec = None
     self.device_info = sd.query_devices(self.device, 'input')
     # soundfile expects an int, sounddevice provides a float:
     self.sample_rate = int(self.device_info['default_samplerate'])
     # Deactivate sound
     current_path = str(Path.cwd().parent)
     self.deactivate_file = "deactivate.m4a"
     self.deactivate_sound = f"{current_path}/Flamingo/files/audio/assistant_sfx/{self.deactivate_file}"
     # Location of downloaded model from setup_speech_recognition
     self.model = vosk.Model(self.model_path)
     # Time to listen for command (seconds)
     self.listen_time = 10
예제 #16
0
    def __init__(self):
        self._ignore_stderr()

        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        vosk.SetLogLevel(-1)

        sample_rate = 16000

        self.recognizer = vosk.KaldiRecognizer(vosk.Model(VOSK_MODEL),
                                               sample_rate)
        self.ring_buffer = RingBuffer()

        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(input=True,
                                         output=False,
                                         format=pyaudio.paInt16,
                                         channels=1,
                                         rate=sample_rate,
                                         frames_per_buffer=2048,
                                         stream_callback=audio_callback)
    if status:
        print(status, file=sys.stderr)
    q.put(bytes(indata))


try:
    if not os.path.exists("model"):
        print ("Please download a model for your language from https://alphacephei.com/vosk/models")
        print ("and unpack as 'model' in the current folder.")
        parser.exit(0)
    
    device_info = sd.query_devices(None, 'input')
    # soundfile expects an int, sounddevice provides a float:
    samplerate = int(device_info['default_samplerate'])

    model = vosk.Model("model")

   
    dump_fn = None

    with sd.RawInputStream(samplerate=samplerate, blocksize = 8000, device=None, dtype='int16',
                            channels=1, callback=callback):
            print('#' * 80)
            print('Press Ctrl+C to stop the recording')
            print('#' * 80)

            rec = vosk.KaldiRecognizer(model, samplerate)
            while True:
                data = q.get()
                if rec.AcceptWaveform(data):
                    value = json.loads(rec.Result()) 
예제 #18
0
def run(res):
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-l',
                        '--list-devices',
                        action='store_true',
                        help='show list of audio devices and exit')
    args, remaining = parser.parse_known_args()
    if args.list_devices:
        print(sd.query_devices())
        parser.exit(0)
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        parents=[parser])
    parser.add_argument('-f',
                        '--filename',
                        type=str,
                        metavar='FILENAME',
                        help='audio file to store recording to')
    parser.add_argument('-m',
                        '--model',
                        type=str,
                        metavar='MODEL_PATH',
                        help='Path to the model')
    parser.add_argument('-d',
                        '--device',
                        type=int_or_str,
                        help='input device (numeric ID or substring)')
    parser.add_argument('-r', '--samplerate', type=int, help='sampling rate')
    args = parser.parse_args(remaining)

    if res == 'fr':
        models = 'model_fr'
    elif res == 'en':
        models = 'model_en'
    else:
        print('la langue entrée n\'est pas prise en contre par le système')
        models = None

    if models is not None:
        try:
            if args.model is None:
                args.model = models
            if not os.path.exists(args.model):
                print(
                    "Please download a model for your language from https://alphacephei.com/vosk/models"
                )
                print("and unpack as {} in the current folder.".format(models))
                parser.exit(0)
            if args.samplerate is None:
                device_info = sd.query_devices(args.device, 'input')
                # soundfile expects an int, sounddevice provides a float:
                args.samplerate = int(device_info['default_samplerate'])

            model = vosk.Model(args.model)

            if args.filename:
                dump_fn = open(args.filename, "wb")
            else:
                dump_fn = None
            with sd.RawInputStream(samplerate=args.samplerate,
                                   blocksize=8000,
                                   device=args.device,
                                   dtype='int16',
                                   channels=1,
                                   callback=callback):
                print("Pour arreter l'enregistrement, Appuyer sur 'Ctrl+c' ")

                rec = vosk.KaldiRecognizer(model, args.samplerate)
                capText = True
                if res == 'fr':
                    print("Je suis à l'écoute ...")
                elif res == 'en':
                    print('Im leasning ...')
                while capText:
                    data = q.get()
                    if rec.AcceptWaveform(data):
                        result = json.loads(rec.Result())
                        if result['text'] != '':
                            capText = False
                            searchText = result['text']
                            print(searchText)

                            def index_phrase_fr():
                                with open('fr.txt', 'r',
                                          encoding='utf-8') as f:
                                    tab = f.readlines()
                                    for text in tab:
                                        if searchText in text:
                                            read_text = text
                                index = tab.index(read_text)
                                engine.setProperty("voice", voices[1].id)
                                with open('en.txt', 'r',
                                          encoding='utf-8') as f:
                                    toto = f.readlines()[index:]
                                    for line in toto:
                                        titi = line
                                        engine.say(titi)
                                        engine.runAndWait()

                            def index_phrase_en():
                                with open('en.txt', 'r',
                                          encoding='utf-8') as f:
                                    tab = f.readlines()
                                    for text in tab:
                                        if searchText in text:
                                            read_text = text
                                index = tab.index(read_text)
                                with open('fr.txt', 'r',
                                          encoding='utf-8') as f:
                                    toto = f.readlines()[index:]
                                    for line in toto:
                                        titi = line
                                        engine.say(titi)
                                        engine.runAndWait()

                            if res == 'fr':
                                index_phrase_fr()
                            elif res == 'en':
                                index_phrase_en()
                        # else:
                            capText = True
                    if dump_fn is not None:
                        dump_fn.write(data)

        except KeyboardInterrupt:
            print('\nDone')
            parser.exit(0)
        except Exception as e:
            parser.exit(type(e).__name__ + ': ' + str(e))
    else:
        print(
            'Les langues prises en contre par le système sont : français et anglais'
        )
예제 #19
0
    '-r', '--samplerate', type=int, help='sampling rate')
args = parser.parse_args(remaining)

try:
    if args.model is None:
        args.model = "model"
    if not os.path.exists(args.model):
        print ("Please download a model for your language from https://alphacephei.com/vosk/models")
        print ("and unpack as 'model' in the current folder.")
        parser.exit(0)
    if args.samplerate is None:
        device_info = sd.query_devices(args.device, 'input')
        # soundfile expects an int, sounddevice provides a float:
        args.samplerate = int(device_info['default_samplerate'])

    model = vosk.Model(args.model)

    if args.filename:
        dump_fn = open(args.filename, "wb")
    else:
        dump_fn = None

    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 16000, device=args.device, dtype='int16',
                            channels=1, callback=callback):
            print('#' * 80)
            print('Press Ctrl+C to stop the recording')
            print('#' * 80)

            rec = vosk.KaldiRecognizer(model, args.samplerate)
            while True:
                data = q.get()
예제 #20
0
from pydub import AudioSegment
from pydub.playback import play
import datetime
import json
import traceback
from collections import deque
import editdistance
from time import sleep

import torch
from transformers import GPT2TokenizerFast, GPT2LMHeadModel

# Build models
current_dir = os.getcwd()

vosk_model = vosk.Model(os.path.join(current_dir, "checkpoints", "vosk-model"))
gpt2_model = GPT2LMHeadModel.from_pretrained("checkpoints/v4/")
tokenizer = GPT2TokenizerFast.from_pretrained("antoiloui/belgpt2",
                                              model_max_length=768,
                                              pad_token='<|pad|>')

# put gpt2 on gpu
device = torch.device('cuda')
gpt2_model.cuda()
# put gpt2 in eval mode
gpt2_model.eval()

# import tacotron stuff
tacotron_dir = "Multilingual_Text_to_Speech"
tacotron_chpt = "generated_switching.pyt"
예제 #21
0
    q.put(bytes(indata))


if __name__ == "__main__":

    env = get_env()

    try:
        if not os.path.exists(env.model_path):
            logging.error(f'{env.model_path=} not found')
            sys.exit("Acoustic-Language Model was not found")
        if env.sample_rate is None:
            device_info = sd.query_devices(kind='input')
            env.samplerate = int(device_info['default_samplerate'])

        model = vosk.Model(env.model_path)

        with sd.RawInputStream(samplerate=env.sample_rate,
                               blocksize=4000,
                               dtype='int16',
                               channels=1,
                               callback=callback):
            logging.info(
                '------------------ Press Ctrl+C to stop the recording ------------------'
            )
            recognizer = vosk.KaldiRecognizer(model, env.sample_rate)

            while True:
                data = q.get()
                if recognizer.AcceptWaveform(data):
                    logging.info(recognizer.Result())
예제 #22
0
tokenData = prepareData(config['tokens'])
patternData = prepareData(config['patterns'])
if config['prefixOther'] is None:
    print('Missing prefixOther property in %s' % configFile)
    exit(1)

if config['prefixMatch'] is None:
    print('Missing prefixMatch property in %s' % configFile)
    exit(1)

if samplerate is None:
    device_info = sd.query_devices(device, 'input')
    # soundfile expects an int, sounddevice provides a float:
    samplerate = int(device_info['default_samplerate'])

model = vosk.Model(config['vosk_model_path'])

with sd.RawInputStream(samplerate=samplerate, blocksize = 8000, device=device, dtype='int16',
                            channels=1, callback=callback):
    rec = vosk.KaldiRecognizer(model, samplerate)
    while True:
        data = q.get()
        if rec.AcceptWaveform(data):
            res = rec.Result()
            obj = json.loads(res)
            text = obj['text']
            if len(text) > 0:
                patterns = tryToParsePatterns(text, tokenData, patternData)
                if patterns is None:
                    print('%s%s' % (config['prefixOther'], text))
                else:
예제 #23
0
        channels=channels,
        rate=fs,
        frames_per_buffer=chunk,
        stream_callback=audio_queue.addFramesToVector,
        # input_device_index=6, # 2 = antlion zero, 5 = laptop, 6 = antlion laptop, empty = system default.
        input=True,
        start=False)

    valid_commands = [
        "turn on turtle", "turn off turtle", "turn on green", "turn off green",
        "turn on blue", "turn off blue", "turn on corner", "turn off corner",
        "engage party mode", "let there be light", "you all suck", "good night"
    ]
    grammar = getGrammar(valid_commands)
    print(grammar)
    voice_model = vosk.Model("vosk-model-small-en-us-0.3")
    recognizer = vosk.KaldiRecognizer(voice_model, fs, grammar)

    current_frame = 0
    while True:
        input("Press ENTER to start recording. Ctrl-C to stop recording.")

        audio_queue.clear()
        stream.start_stream()
        print("Recording...")
        try:
            while True:
                time.sleep(0.5)
                #audio_data = stream.read(chunk)
                #audio_data = audio_queue.getNextChunk()
                audio_data = None
예제 #24
0
 def __init__(self, model_path, samplerate=16000, identifier=None):
     super(Vosk, self).__init__(model_path, identifier)
     vosk.SetLogLevel(0)
     self.model = vosk.Model(self.model_path)
     self.rec = vosk.KaldiRecognizer(self.model, samplerate)
     self.samplerate_hz = samplerate