Пример #1
0
def translate_file(filename="last5.wav"):
    SetLogLevel(-1)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)
    filepath = "./" + filename
    wf = wave.open(filepath, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("./model")
    rec = KaldiRecognizer(model, 16000)
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            res = rec.FinalResult()
            #print(rec.FinalResult())
        #else:
        #print(rec.PartialResult())
    try:  #for some reason res doesnt get assigned post loop
        results = res
        #print("results: " +results)
    except UnboundLocalError:
        results = rec.FinalResult(
        )  #rec.FinalResult() holds the words in this case
    results_json = json.loads(results)
    #print(results_json["text"])
    return (results_json["text"])  #["results"] for confidence of each word
Пример #2
0
    def __init__(self, command_config="commands.json", alert_sound_enabled=True):
        """Constructor for AnkiSpeechToCommand. Initialises vosk speech-to-text module,
        AnkiConnect API handler object, and derives word commands from a JSON file.

        Args:
            command_config (str, optional): Filename for the JSON command file. Defaults to "commands.json".
            alert_sound_enabled (bool, optional): Controls confirmation sound for attach, pause, and unpause commands. Defaults to True.

        Raises:
            json.decoder.JSONDecodeError: Handles decode errors from the JSON command file, such as malformed syntax.
            AnkiVoiceError: Handles anki-voice errors, in particular here for missing command definitions.
        """
        # Verify speech-to-text engine (vosk) model exists
        if not Path(Path(__file__).resolve().parent, "Model").is_dir():
            print("Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' (directory) in the current folder.")
            sys.exit(1)
        # Configure speech-to-text engine
        SetLogLevel(-10)
        self._model = Model("model")
        self._recogniser = KaldiRecognizer(self._model, 16000)
        self._stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1,
                                              rate=16000, input=True, frames_per_buffer=2048)
        self._stream.start_stream()
        # Create AnkiConnect API handler object
        self._anki_action = AnkiActionHandler(
            alert_sound_enabled=alert_sound_enabled)
        # Behaviour configuration
        self._speech_to_text_paused = False
        self._alert_sound_enabled = alert_sound_enabled
        # Parse command JSON configuation
        self.command_config_load(command_config)
        # tba
        self.engine = pyttsx3.init()
Пример #3
0
def translate_file(filename="last5.wav"):
    SetLogLevel(0)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)
    filepath = "./" + filename
    wf = wave.open(filepath, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("./model")
    rec = KaldiRecognizer(model, wf.getframerate())

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            print(rec.Result())
        else:
            print(rec.PartialResult())
    results = rec.FinalResult()
    return json.loads(results)[
        "text"]  #["results"] for confidence of each word
Пример #4
0
def speech_to_text(file):
    SetLogLevel(0)

    timestamp = []
    text = []

    #ouverture fichier audio
    with wave.open(file, "rb") as wav_file:

        #entrainement modèle avec les données
        model = Model("model")
        rec = KaldiRecognizer(model, wav_file.getframerate())

        #lecture du fichier audio par bloc de frames
        data = wav_file.readframes(4000)
        while len(data) != 0:
            if rec.AcceptWaveform(data):

                #récupération de la transcription par json
                res = json.loads(rec.Result())

                #ajout de l'horodatage et de la transcription dans les listes
                #si ils existent
                if ('result' in res):
                    timestamp.append(res['result'][0]['start'])
                    text.append(res['text'])

            #lecture du bloc de frames suivant
            data = wav_file.readframes(4000)

    return timestamp, text
Пример #5
0
 def __init__(self, fileName):
     """
     初始化
     :param fileName:
     """
     SetLogLevel(0)
     self.rec, self.process = self.judgeCondition(fileName)
Пример #6
0
def main():

    argv = sys.argv[1:]
    model_path = "./model"
    filename = ""

    try:

        opts, _ = getopt.getopt(argv, "f:m:", ["file_name =", "model_path ="])

        #print(opts)
        #print(args)

    except:
        print("Error with arguments")
        return

    for opt, arg in opts:
        if opt in ['-f', '--file_name']:
            filename = arg
        elif opt in ['-m', '--model_path']:
            model_path = arg

    print("FILE: ", filename, " MODEL: ", model_path)

    if not os.path.exists(model_path):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        return

    SetLogLevel(-1)
    sample_rate = 16000
    model = Model(model_path)
    rec = KaldiRecognizer(model, sample_rate)

    process = subprocess.Popen([
        'ffmpeg', '-loglevel', 'quiet', '-i', filename, '-ar',
        str(sample_rate), '-ac', '1', '-f', 's16le', '-'
    ],
                               stdout=subprocess.PIPE)

    result = ""
    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            data = json.loads(rec.Result())
            result += data['text']

    #print(result)
    data = json.loads(rec.FinalResult())
    result += data['text']
    print("\n")
    print(result)
Пример #7
0
    def __init__(self, wav_audio, model="model-indian"):

        self.model = Model(model)
        self.wav_audio = wav_audio

        self.transcript = ""
        self.timestamped_text = []
        self._wf = None
        self._output_wav = None
        # Remove logging
        SetLogLevel(-1)
Пример #8
0
 def set_up(self):
     """
     set up vosk
     :return:
     """
     if not os.path.exists(self.model_path):
         print(
             "Please download the model from "
             "https://github.com/alphacep/vosk-api/blob/master/doc/models.md "
             "and unpack as 'model' in the current folder.")
         exit(1)
     SetLogLevel(level=0)
Пример #9
0
    def init_app(self, config):
        SetLogLevel(0)
        model_path = config.MODEL_PATH
        self.rate = int(config.RATE)
        if not os.path.exists(model_path):
            print("Error in model path. Such directory does not exist!")
            exit(1)
        self.model = Model(model_path)
        # self.stt_recognizer = KaldiRecognizer(self.model, rate)

        # self.psql_client = PostgresClient()
        # self.psql_client.init_app(config=config)

        self.sftp_client = SftpClient()
        self.sftp_client.init_app(config=config)

        self.config = config
Пример #10
0
def speech_recog(fileIn):
    datalist = []
    SetLogLevel(0)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)

    sample_rate = 16000
    model = Model("model")
    rec = KaldiRecognizer(model, sample_rate)
    try:
        process = subprocess.Popen([
            'ffmpeg', '-loglevel', 'quiet', '-i', fileIn, '-ar',
            str(sample_rate), '-ac', '1', '-f', 's16le', '-'
        ],
                                   stdout=subprocess.PIPE)
    except IndexError:
        raise

    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            result = rec.Result()
            datalist.append(json.loads(result))

    finalResult = rec.FinalResult()
    datalist.append(json.loads(finalResult))
    print(fileIn)

    for entry in datalist:
        if "result" in entry:
            for word in entry["result"]:
                word.update({"file": fileIn})

    words = words_from_list(datalist)

    with open(os.path.splitext(fileIn)[0] + ".json", "w") as output_json:
        output_json.write(json.dumps(datalist))

    return words
Пример #11
0
    def __init__(self, callback=None, **kwargs):
        """
        Start recording the microphone and analyse audio with Vosk
        :param callback: The callback function to call to send the text
        :param kwargs:
        """

        SpeechRecognition.__init__(self, kwargs.get('audio_file_path', None))

        self.main_controller_callback = callback
        self.language = kwargs.get('language', "model-fr")
        self.log_level = kwargs.get('log_level', -1)
        self.grammar_file = kwargs.get('grammar_file', None)

        SetLogLevel(self.log_level)

        self.set_callback(self.vosk_callback)
        self.start_processing()
Пример #12
0
def init():
    """Initialise logging config for the application"""
    config_file_name = os.environ.get('CONFIG_FILE_NAME',
                                      'logging_config.yaml')
    print(
        f'Configuring the logging system from config file: {config_file_name}',
        flush=True)
    try:
        with open(os.path.join(os.path.dirname(__file__), config_file_name),
                  'r') as fin:
            yml = yaml.load(fin, Loader=yaml.FullLoader)
            logging.config.dictConfig(yml)
    except (TypeError, FileNotFoundError, ValueError):
        print('Failed to initialise the logging framework', file=sys.stderr)
        traceback.print_exc(file=sys.stderr)
    # Set Vosk log level to silence output
    SetLogLevel(-1)
    # Set TensorFlow C++ logging to silence non error
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
Пример #13
0
def vosk_model(address):
    SetLogLevel(2)

    wf = wave.open(address, "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("../audio_utils/tests/vosk_test/model")
    rec = KaldiRecognizer(model, wf.getframerate())

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            print(rec.Result())
        else:
            print(rec.PartialResult())

    print(rec.FinalResult())
Пример #14
0
    def decode_file(self, aud_file):
        SetLogLevel(0)
        sentence = ""
        results = ""
        confidence = 0
        tot = 0

        wf = wave.open(aud_file, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
        ) != "NONE":  #checking certain file characteristics
            print("Audio aud_file must be WAV format mono PCM.")
            exit(1)

        while True:  #loop for doing voice recognition
            data = wf.readframes(4000)
            if len(data) == 0:  #done reading audio file
                break
            if self.rec.AcceptWaveform(
                    data):  #finished recognition on segment of audio file
                items = self.rec.Result()
                results = json.loads(items)
                if len(results.items(
                )) > 1:  #false recognition, sometimes nothing is detected
                    for i in results["result"]:
                        confidence += i["conf"]
                        tot += 1
                    sentence = sentence + " " + results["text"]
                else:
                    print(self.rec.PartialResult())
        f_res = json.loads(self.rec.FinalResult())
        if len(f_res.items()) > 1:
            return f_res["text"]
        wf.close()
        if tot > 0 and confidence / tot > .8:  #checking confidence of recognition
            return sentence.lower().strip()
        elif tot > 0:
            print("confidence too low: " + str(confidence / tot))
        return ""
Пример #15
0
    def decode_file(self, aud_file):
        SetLogLevel(0)

        wf = wave.open(aud_file, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
        ) != "NONE":
            print("Audio aud_file must be WAV format mono PCM.")
            exit(1)

        results = []

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if self.rec.AcceptWaveform(data):
                results.append(self.rec.Result())

        for i in results:
            y = json.loads(i)
            print("---VOSK TEXT---", y["text"])
        print("results:", results)
        return results
    def _recognize_vosk(self):
        SetLogLevel(0)
        if not os.path.exists("vosk-model-small-es-0.3"):
            raise Exception("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
        text = []
        wf = wave.open(self.file_name, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            raise Exception("Audio file must be WAV format mono PCM.")
        model = Model("vosk-model-small-es-0.3")
        rec = KaldiRecognizer(model, wf.getframerate())
        rec.SetWords(True)

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            rec.AcceptWaveform(data)

        try:
            res = json.loads(rec.FinalResult())
            return res['text']
        except Exception as e:
            print(e)
            return ""
Пример #17
0
def speech_to_text(model, audio_file, output_text_file, verbose):
    SetLogLevel(-1)

    process = subprocess.Popen(
        [
            "ffmpeg",
            "-loglevel",
            "quiet",
            "-i",
            audio_file,
            "-ar",
            str(sample_rate),
            "-ac",
            "1",
            "-f",
            "s16le",
            "-",
        ],
        stdout=subprocess.PIPE,
    )

    output_f = None
    if output_text_file:
        output_f = open(output_text_file, mode="w")
    else:
        verbose = True

    for result in reconize(model, process):
        if result:
            if output_f:
                output_f.write(f"{result}\n")
            if verbose:
                print(result)

    if output_f:
        output_f.close()
Пример #18
0
    def __init__(self,
                 notifier,
                 rate=16000,
                 wav_dir=None,
                 model=os.path.join(_MODEL_DIR, 'model')):
        """
        @see AudioInput.__init__()

        :type  rate:
        :param rate:
            The override for the rate, if not the model's one.
        :type  wav_dir:
        :param wav_dir:
            Where to save the wave files, if anywhere.
        :type  model:
        :param model:
            The path to the Vosk model file.
        """
        # Load in and configure the model.
        if not os.path.exists(model):
            raise IOError("Not found: %s" % (model, ))
        LOG.info("Loading model from %s, this could take a while", model)
        SetLogLevel(1 if LOG.getLogger().getEffectiveLevel() >= 20 else 2)
        self._model = Model(model)
        self._recognizer = KaldiRecognizer(self._model, rate)
        LOG.info("Model loaded")

        # Wen can now init the superclass
        super(VoskInput, self).__init__(notifier,
                                        format=pyaudio.paInt16,
                                        channels=1,
                                        rate=rate,
                                        wav_dir=wav_dir)

        # Where we put the results
        self._results = []
Пример #19
0
def transcribe(file_name):
    sound = AudioSegment.from_wav(file_name)
    sound = sound.set_channels(1)
    sound.export("generate/audio.wav", format="wav")

    SetLogLevel(-1)

    if not os.path.exists("model"):
        print(
            "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder."
        )
        exit(1)

    wf = wave.open('generate/audio.wav', "rb")
    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype(
    ) != "NONE":
        print("Audio file must be WAV format mono PCM.")
        exit(1)

    model = Model("model")
    rec = KaldiRecognizer(model, wf.getframerate())

    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            pass
            # print(rec.Result())
        else:
            pass
            # print(rec.PartialResult())

    r = rec.FinalResult()
    r = json.loads(r)
    return r['text']
Пример #20
0
#!/usr/bin/env python3

from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import subprocess
import srt
import json
import datetime

SetLogLevel(-1)

if not os.path.exists("./VTT/model"):
    print("Trying to download voice model, this is a one time thing and may take a while...")
    try:
        if not os.path.exists("./VTT/model.zip"):
            import urllib.request
            print("Downloading...")
            urllib.request.urlretrieve("https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip", "./VTT/model.zip")
        import zipfile
        import platform
        print("Extracting...")
        with zipfile.ZipFile("./VTT/model.zip", 'r') as zip_ref:
            zip_ref.extractall("./VTT/")
        ##Rename the folder:
        files = os.listdir('./VTT')
        for file in files:
            if 'model' in file and '.zip' not in file:
                if platform.system() == "Windows":
                    status = subprocess.call('copy %s model /e'%('./VTT/'+file), shell=True)
                else:
Пример #21
0
 def __init__(self, model_path, text_processor=None):
     SetLogLevel(-1)
     self.vosk_model = Model(model_path)
     self.text_processor = text_processor
     self.sample_rate = 16000
Пример #22
0
#!/usr/bin/env python3
from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import glob
import wave
from pathlib import Path

SetLogLevel(0)

home = str(Path.home())
modelDir = os.path.join(home, "git", "callrail_voice_to_text", "Data",
                        "SpeechModel", "model")

if not os.path.exists(modelDir):
    print(
        "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the folder:"
        + modelDir)
    exit(1)

model = Model(modelDir)
wavFileDir = os.path.join(home, "git", "callrail_voice_to_text", "Data",
                          "SoundEncodeToWav")
# Establish destination directory, attmept to create the directory
try:
    os.mkdir(wavFileDir)
except OSError as error:
    print(error)

destinationDir = os.path.join(home, "git", "callrail_voice_to_text", "Data",
                              "WavToTextResults")
Пример #23
0
 def __init__(self, modelpath, log_prefix='[vosk_stt]'):
     SetLogLevel(0)
     self.log_prefix = log_prefix
     self.model = modelpath
 def get_recognizer(self, framerate):
     SetLogLevel(-1)
     model = Model(os.path.join(c.PLUGIN_PATH, "vosk_alternatives",
                                "model"))
     rec = KaldiRecognizer(model, framerate)
     return rec
Пример #25
0
 def __init__(self):
     """Set the log level and load the Vosk model"""
     SetLogLevel(config.vosk_log_level)
     self.model = Model(config.vosk_model_dir)
Пример #26
0
def gen_subparts(input_file,
                 model_dir,
                 verbose=False,
                 partlen=4,
                 progress=False):
    SetLogLevel(0 if verbose else -1)

    model = Model(model_dir)
    rec = KaldiRecognizer(model, 16000)

    process = subprocess.Popen([
        'ffmpeg', '-loglevel', 'quiet', '-i', input_file, '-ar',
        str(16000), '-ac', '1', '-f', 's16le', '-'
    ],
                               stdout=subprocess.PIPE)

    r = subprocess.run(
        "ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1"
        .split() + [input_file],
        stdout=subprocess.PIPE)
    duration = float(r.stdout.decode('utf-8').strip())

    if progress:
        pbar = tqdm(total=duration, unit="s")

    prev_end = 0
    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            r = json.loads(rec.Result())
            if 'result' in r:
                resultpart = []  # TODO: use this across AccesptForm
                for result in r['result']:
                    if len(resultpart) > 0 and float(result['end']) - float(
                            resultpart[0]['start']) >= partlen:
                        yield SubPart(start=resultpart[0]['start'],
                                      end=float(resultpart[-1]['end']),
                                      text=" ".join(r['word']
                                                    for r in resultpart))
                        prev_end = float(resultpart[-1]['end'])
                        resultpart = []
                    if float(result['end'] - result['start']) >= partlen:
                        yield SubPart(start=float(result['start']),
                                      end=float(result['end']),
                                      text=result['word'])
                        prev_end = float(result['end'])
                        resultpart = []
                    else:
                        resultpart.append(result)
                    if progress:
                        pbar.update(float(result['end'] - pbar.n))

                if len(resultpart) > 0:
                    yield SubPart(start=float(resultpart[0]['start']),
                                  end=float(resultpart[-1]['end']),
                                  text=" ".join(r['word'] for r in resultpart))
                    prev_end = float(resultpart[-1]['end'])
                    resultpart = []

        else:
            pass
            #print(rec.PartialResult())
    #pprint(rec.PartialResult())
    if progress:
        pbar.close()
    r = json.loads(rec.PartialResult())
    text = r['partial']
    yield SubPart(start=prev_end, end=duration, text=text)
Пример #27
0
 def _enable_logs(cls, vosk_logs):
     if vosk_logs:
         SetLogLevel(0)  # Vosk logs
     logging.basicConfig(level=logging.DEBUG)