Exemplo n.º 1
0
    def special_mode(self, name, phrases):
        plugin_info = self.plugins.get_plugin(self.special_stt_slug,
                                              category='stt')
        plugin_config = profile.get_profile()

        original_stt_engine = self.active_stt_engine

        # If the special_mode engine is not specifically set,
        # copy the settings from the active stt engine.
        try:
            mode_stt_engine = plugin_info.plugin_class(name, phrases,
                                                       plugin_info,
                                                       plugin_config)
            if (profile.check_profile_var_exists(['special_stt'])):
                if (profile.check_profile_var_exists(
                    ['special_stt', 'samplerate'])):
                    mode_stt_engine._samplerate = int(
                        profile.get_profile_var(['special_stt', 'samplerate']))
                if (profile.check_profile_var_exists(
                    ['special_stt', 'volume_normalization'])):
                    mode_stt_engine._volume_normalization = float(
                        profile.get_profile_var(
                            ['special_stt', 'volume_normalization']))
            else:
                mode_stt_engine._samplerate = original_stt_engine._samplerate
                mode_stt_engine._volume_normalization = original_stt_engine._volume_normalization
            self.active_stt_engine = mode_stt_engine
            yield
        finally:
            self.active_stt_engine = original_stt_engine
Exemplo n.º 2
0
def fetch_intents(c):
    # Get a list of all intents
    # This will be a combination of all intents from detected
    # plugins, plus any intents in either intents or verified intents
    c.execute(" ".join([
        "select intent from (", "   select ", "       intent",
        "   from audiolog", "   union select",
        "       verified_intent as intent", "   from audiolog",
        ")a where intent not in ('', 'unclear') order by intent"
    ]))
    _intents = {}
    for row in c.fetchall():
        _intents[row[0]] = 1
    ps = pluginstore.PluginStore()
    ps.detect_plugins("speechhandler")
    for info in ps.get_plugins_by_category("speechhandler"):
        try:
            plugin = info.plugin_class(info, profile.get_profile())
            if (hasattr(plugin, "intents")):
                intents = [intent for intent in plugin.intents()]
                for intent in intents:
                    _intents[intent] = 1
        except Exception as e:
            _logger.warn("Plugin '{}' skipped! (Reason: {})".format(
                info.name, e.message if hasattr(e, 'message') else 'Unknown'),
                         exc_info=True)
    return sorted([intent for intent in _intents])
Exemplo n.º 3
0
    def __init__(self, *args, **kwargs):
        self._logger = logging.getLogger(__name__)
        translations = i18n.parse_translations(paths.data('locale'))
        translator = i18n.GettextMixin(translations, profile.get_profile())
        _ = translator.gettext

        plugin.STTPlugin.__init__(self, *args, **kwargs)

        self._http = requests.Session()

        self._url = profile.get(['kaldigstserver-stt', 'url'],
                                defaultKaldiServer)
Exemplo n.º 4
0
    def getEventsTomorrow(self, mic):

        # Time Delta function for adding one day

        one_day = datetime.timedelta(days=1)
        tz = app_utils.get_timezone(profile.get_profile())

        # Gets tomorrows Start and End Time in RFC3339 Format
        d = datetime.datetime.now(tz=tz) + one_day
        utcString = d.isoformat()
        m = re.search(r'((\+|\-)[0-9]{2}\:[0-9]{2})', str(utcString))
        utcString = m.group(0)
        tomorrowStartTime = "".join(
            [str(d.strftime("%Y-%m-%d")), "T00:00:00", utcString])
        tomorrowEndTime = str(d.strftime("%Y-%m-%d")) + "T23:59:59" + utcString

        page_token = None

        while True:

            # Gets events from primary calendar from each page
            # in tomorrow day boundaries
            events = self.service.events().list(
                calendarId='primary',
                pageToken=page_token,
                timeMin=tomorrowStartTime,
                timeMax=tomorrowEndTime).execute()
            if (len(events['items']) == 0):
                mic.say(self.gettext("You have no events scheduled Tomorrow"))
                return

            for event in events['items']:

                try:
                    eventTitle = event['summary']
                    eventTitle = str(eventTitle)
                    eventRawStartTime = event['start']
                    eventRawStartTime = eventRawStartTime['dateTime'].split(
                        "T")
                    temp = eventRawStartTime[1]
                    startHour, startMinute, temp = temp.split(":", 2)
                    startHour = int(startHour)
                    appendingTime = self.gettext("am")

                    if ((startHour - 12) > 0):
                        startHour = startHour - 12
                        appendingTime = self.gettext("pm")

                    startMinute = str(startMinute)
                    startHour = str(startHour)
                    mic.say(" ".join([
                        eventTitle, "at", startHour + ":" + startMinute,
                        appendingTime
                    ]))

                except KeyError:
                    mic.say("Check Calendar that you added it correctly")

            page_token = events.get('nextPageToken')

            if not page_token:
                return
Exemplo n.º 5
0
    def getEventsToday(self, mic):

        tz = app_utils.get_timezone(profile.get_profile())

        # Get Present Start Time and End Time in RFC3339 Format
        d = datetime.datetime.now(tz=tz)
        utcString = d.isoformat()
        m = re.search(r'((\+|\-)[0-9]{2}\:[0-9]{2})', str(utcString))
        utcString = str(m.group(0))
        todayStartTime = str(d.strftime("%Y-%m-%d")) + "T00:00:00" + utcString
        todayEndTime = str(d.strftime("%Y-%m-%d")) + "T23:59:59" + utcString
        page_token = None

        while True:

            # Gets events from primary calendar from each page
            # in present day boundaries
            events = self.service.events().list(calendarId='primary',
                                                timeMin=todayStartTime,
                                                timeMax=todayEndTime,
                                                singleEvents=True,
                                                orderBy='startTime').execute()

            if (len(events['items']) == 0):
                mic.say("You have no events scheduled for today")
                return

            for event in events['items']:

                try:
                    eventTitle = event['summary']
                    eventTitle = str(eventTitle)
                    eventRawStartTime = event['start']
                    eventRawStartTime = eventRawStartTime['dateTime'].split(
                        "T")
                    temp = eventRawStartTime[1]
                    startHour, startMinute, temp = temp.split(":", 2)
                    startHour = int(startHour)
                    appendingTime = "am"

                    if ((startHour - 12) > 0):
                        startHour = startHour - 12
                        appendingTime = "pm"

                    startMinute = str(startMinute)
                    startHour = str(startHour)
                    mic.say(" ".join([
                        eventTitle,
                        self.gettext("at"), startHour + ":" + startMinute,
                        appendingTime
                    ]))

                except KeyError:
                    mic.say(
                        self.gettext(
                            "Check Calendar that you added it correctly"))

            page_token = events.get('nextPageToken')

            if not page_token:
                return
Exemplo n.º 6
0
    def HandleCommand(self, command, description):
        try:
            conn = sqlite3.connect(self.audiolog_db)
            c = conn.cursor()
            response = []
            continue_next = True
            nextcommand = ""
            if (command == ""):
                response.append(
                    "<h2>Preparing to adapt Pocketsphinx model</h2>")
                description.append(
                    "Adapting standard {} pocketsphinx model".format(
                        self.language))
                nextcommand = "checkenviron"
            if (command == "checkenviron"):
                # Now run through the steps to adapt the standard model
                # Start by checking to see if we have a copy of the standard
                # model for this user's chosen language and download it if not.
                # Check for the files we need
                if (not check_pocketsphinx_model(self.standard_dir)):
                    # Check and see if we already have a copy of the standard
                    # language model
                    cmd = [
                        'git', 'clone', '-b', self.language,
                        'https://github.com/NaomiProject/CMUSphinx_standard_language_models.git',
                        self.standard_dir
                    ]
                    completedprocess = run_command(cmd)
                    response.append(
                        process_completedprocess(completedprocess,
                                                 output='html'))
                    if (completedprocess.returncode != 0):
                        continue_next = False
                response.append("Environment configured")
                nextcommand = "prepareworkingdir"
            if (command == "prepareworkingdir"):
                # At this point, we should have the standard model we need
                if (check_pocketsphinx_model(self.standard_dir)):
                    # FIXME It might be safest to remove the working dir at this
                    # point if it already exists
                    if not os.path.isdir(self.model_dir):
                        # Copy the sphinx model into model_dir
                        shutil.copytree(self.standard_dir, self.model_dir)
                    if (check_pocketsphinx_model(self.model_dir)):
                        query = " ".join([
                            "select", " rowid,", " case",
                            "  when length(trim(verified_transcription))>0",
                            "   then (length(trim(verified_transcription))-length(replace(trim(verified_transcription),' ','')))+1",
                            "  else 0", " end as WordCount,", " filename,",
                            " upper(trim(replace(replace(verified_transcription,'?',''),',',''))) as transcription",
                            "from audiolog",
                            "where type in('active','passive') and reviewed!=''"
                        ])
                        df = pd.read_sql_query(query, conn)
                        # Take the above and create naomi.fileids and naomi.transcription
                        # fileids:
                        description.append("on {} wav files".format(
                            str(df.shape[0])))
                        response.append("Adapting on {} wav files".format(
                            df.shape[0]))
                        with open(
                                os.path.join(self.working_dir,
                                             "naomi.fileids"), "w+") as f:
                            for filename in df['filename']:
                                # No need to copy file, just leave it in audiolog
                                f.write("{}\n".format(
                                    filename.rsplit(".", 1)[0]))
                        with open(
                                os.path.join(self.working_dir,
                                             "naomi.transcription"),
                                "w+") as f:
                            for t in df['transcription']:
                                f.write("<s> {} </s>\n".format(t.lower()))
                        nextcommand = "featureextraction"
                    else:
                        response.append(
                            "Error: failed to populate working model")
            if (command == "featureextraction"):
                cmd = [
                    'sphinx_fe', '-argfile',
                    os.path.join(self.model_dir,
                                 'feat.params'), '-samprate', '16000', '-c',
                    os.path.join(self.working_dir, 'naomi.fileids'), '-di',
                    self.audiolog_dir, '-do', self.working_dir, '-ei', 'wav',
                    '-eo', 'mfc', '-mswav', 'yes'
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "buildweights"
            if (command == "buildweights"):
                bw = '/usr/lib/sphinxtrain/bw'
                if os.path.isfile('/usr/local/libexec/sphinxtrain/bw'):
                    bw = '/usr/local/libexec/sphinxtrain/bw'
                cmd = [
                    bw, '-hmmdir', self.model_dir, '-moddeffn',
                    os.path.join(self.model_dir,
                                 'mdef.txt'), '-ts2cbfn', '.ptm.', '-feat',
                    '1s_c_d_dd', '-svspec', '0-12/13-25/26-38', '-cmn',
                    'current', '-agc', 'none', '-dictfn',
                    os.path.join(self.model_dir, 'cmudict.dict'), '-ctlfn',
                    os.path.join(self.working_dir, 'naomi.fileids'), '-lsnfn',
                    os.path.join(self.working_dir, 'naomi.transcription'),
                    '-cepdir', self.working_dir, '-accumdir', self.working_dir
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "mllr"
            if (command == "mllr"):
                # MLLR is a cheap adaptation method that is suitable when the amount of data is limited. It's good for online adaptation.
                # MLLR works best for a continuous model. It's effect for semi-continuous models is limited.
                mllr = '/usr/lib/sphinxtrain/mllr_solve'
                if os.path.isfile('/usr/local/libexec/sphinxtrain/mllr_solve'):
                    mllr = '/usr/local/libexec/sphinxtrain/mllr_solve'
                cmd = [
                    mllr, '-meanfn',
                    os.path.join(self.model_dir, 'means'), '-varfn',
                    os.path.join(self.model_dir, 'variances'), '-outmllrfn',
                    os.path.join(self.model_dir, 'mllr_matrix'), '-accumdir',
                    self.working_dir
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "map"
            if (command == "map"):
                # Update the acoustic model files with MAP
                # In this case, unlike MLLR, we don't create a generic transform, but update each parameter in the model
                # We copy the acoustic model directory and overwrite the new directory with the adapted model files
                if (os.path.isdir(self.adapt_dir)):
                    # Remove the adapt dir
                    shutil.rmtree(self.adapt_dir)
                    response.append("Cleared adapt directory {}".format(
                        self.adapt_dir))
                shutil.copytree(self.model_dir, self.adapt_dir)
                map_adapt = '/usr/lib/sphinxtrain/map_adapt'
                if os.path.isfile('/usr/local/libexec/sphinxtrain/map_adapt'):
                    map_adapt = '/usr/local/libexec/sphinxtrain/map_adapt'
                cmd = [
                    map_adapt, '-moddeffn',
                    os.path.join(self.model_dir,
                                 'mdef.txt'), '-ts2cbfn', '.ptm.', '-meanfn',
                    os.path.join(self.model_dir, 'means'), '-varfn',
                    os.path.join(self.model_dir, 'variances'), '-mixwfn',
                    os.path.join(self.model_dir, 'mixture_weights'), '-tmatfn',
                    os.path.join(self.model_dir, 'transition_matrices'),
                    '-accumdir', self.working_dir, '-mapmeanfn',
                    os.path.join(self.adapt_dir, 'means'), '-mapvarfn',
                    os.path.join(self.adapt_dir, 'variances'), '-mapmixwfn',
                    os.path.join(self.adapt_dir,
                                 'mixture_weights'), '-maptmatfn',
                    os.path.join(self.adapt_dir, 'transition_matrices')
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "sendump"
            if (command == "sendump"):
                # Recreating the adapted sendump file
                # a sendump file saves space and is supported by pocketsphinx
                mk_s2sendump = '/usr/lib/sphinxtrain/mk_s2sendump'
                if os.path.isfile(
                        '/usr/local/libexec/sphinxtrain/mk_s2sendump'):
                    mk_s2sendump = '/usr/local/libexec/sphinxtrain/mk_s2sendump'
                cmd = [
                    mk_s2sendump, '-pocketsphinx', 'yes', '-moddeffn',
                    os.path.join(self.adapt_dir, 'mdef.txt'), '-mixwfn',
                    os.path.join(self.adapt_dir, 'mixture_weights'),
                    '-sendumpfn',
                    os.path.join(self.adapt_dir, 'sendump')
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode != 0):
                    continue_next = False
                nextcommand = "updateprofile"
            if (command == "updateprofile"):
                # Format the dictionary
                # Remove whitespace at the beginning of each line
                # Remove (#) after first word
                # collapse multiple whitespaces into a single space
                # Remove any whitespaces from the end
                with open(os.path.join(self.adapt_dir, "cmudict.dict"),
                          "r") as in_file:
                    with open(self.formatteddict_path, "w+") as out_file:
                        for line in in_file:
                            # Remove whitespace at beginning and end
                            line = line.strip()
                            # remove the number in parentheses (if there is one)
                            line = re.sub('([^\\(]+)\\(\\d+\\)', '\\1', line)
                            # compress all multiple whitespaces into a single whitespace
                            line = re.sub('\s+', ' ', line)
                            # replace the first whitespace with a tab
                            line = line.replace(' ', '\t', 1)
                            print(line, file=out_file)
                # Use phonetisaurus to prepare an fst model
                cmd = [
                    "phonetisaurus-train", "--lexicon",
                    self.formatteddict_path, "--seq2_del", "--dir_prefix",
                    os.path.join(self.adapt_dir, "train")
                ]
                completedprocess = run_command(cmd)
                response.append(
                    process_completedprocess(completedprocess, output='html'))
                if (completedprocess.returncode == 0):
                    # Now set the values in profile
                    profile.set_profile_var(['pocketsphinx', 'fst_model'],
                                            os.path.join(
                                                self.adapt_dir, "train",
                                                "model.fst"))
                    profile.set_profile_var(['pocketsphinx', 'hmm_dir'],
                                            self.adapt_dir)
                    profile.save_profile()
                    # Also run through the list of words that have been used
                    # that are not the wake word or a command word and make
                    # sure they are all identified so pocketsphinx can match
                    # them and not get confused on word boundaries.
                    # Pull a list of all words spoken from
                    # verified translations
                    query = " ".join([
                        "with recursive split(", " word,", " rest", ") as (",
                        " select", "  '',"
                        "  upper(replace(replace(verified_transcription,'?',''),',','')) || ' '",
                        " from audiolog",
                        " where type in ('active','passive') and reviewed!=''",
                        " union all select",
                        "  substr(rest, 0, instr(rest,' ')),",
                        "  substr(rest,instr(rest,' ')+1)",
                        " from split where rest <> ''"
                        ")",
                        "select word from split where word!='' group by word"
                    ])
                    c.execute(query)
                    words_used = [x[0].upper() for x in c.fetchall()]
                    # Pull the list of words from the local standard phrases
                    keywords = profile.get_profile_var(['keyword'])
                    if (isinstance(keywords, str)):
                        keywords = [keywords]
                    phrases = [keyword.upper() for keyword in keywords]
                    custom_standard_phrases_dir = paths.sub(
                        os.path.join("data", "standard_phrases"))
                    custom_standard_phrases_file = os.path.join(
                        custom_standard_phrases_dir,
                        "{}.txt".format(self.language))
                    if (os.path.isfile(custom_standard_phrases_file)):
                        with open(custom_standard_phrases_file, mode="r") as f:
                            for line in f:
                                phrase = line.strip().upper()
                                if phrase:
                                    phrases.append(phrase)
                    # Get all the phrases that the plugins are looking for
                    ps = pluginstore.PluginStore()
                    ps.detect_plugins("speechhandler")
                    for info in ps.get_plugins_by_category("speechhandler"):
                        try:
                            plugin = info.plugin_class(info,
                                                       profile.get_profile())
                            # get_phrases is vestigial now
                            if (hasattr(plugin, "get_phrases")):
                                for phrase in plugin.get_phrases():
                                    phrases.extend([
                                        word.upper()
                                        for word in phrase.split()
                                    ])
                            # get the phrases from the plugin intents
                            if (hasattr(plugin, "intents")):
                                intents = plugin.intents()
                                for intent in intents:
                                    for template in intents[intent]['locale'][
                                            self.language]['templates']:
                                        phrases.extend([
                                            word.upper()
                                            for word in template.split()
                                        ])
                        except Exception as e:
                            message = "Unknown"
                            if hasattr(e, "message"):
                                message = e.message
                            response.append(
                                "Plugin {} skipped! (Reason: {})".format(
                                    info.name, message))
                            self._logger.warning(
                                "Plugin '{}' skipped! (Reason: {})".format(
                                    info.name, message),
                                exc_info=True)

                    # Get the set of all words in words_used that do not appear
                    # in phrases
                    print("Phrases:")
                    print(phrases)
                    new_phrases = [
                        word for word in words_used if word not in phrases
                    ]
                    response.append("{} new phrases detected".format(
                        len(new_phrases)))
                    description.append("adding {} new phrases".format(
                        len(new_phrases)))
                    if (len(new_phrases) > 0):
                        table = "<table><tr><th>new phrase</th></tr>"
                        # Append the new phrases to the custom
                        # standard_phrases\{language}.txt file
                        if (not os.path.isdir(custom_standard_phrases_dir)):
                            os.makedirs(custom_standard_phrases_dir)
                        with open(custom_standard_phrases_file,
                                  mode="a+") as f:
                            for word in new_phrases:
                                table += "<tr><td>{}</td></tr>".format(word)
                                print(word, file=f)
                        table += "</table>"
                        response.append(table)
                    # Finally, force naomi to regenerate all of the
                    # pocketsphinx vocabularies by deleting all the
                    # vocabularies/{language}/sphinx/{}/revision
                    # files:
                    for revision_file in glob.glob(
                            paths.sub('vocabularies', self.language, 'sphinx',
                                      "*", "revision")):
                        os.remove(revision_file)
                    # Add the description
                    c.execute('''insert into trainings values(?,?,?)''',
                              (datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                               'Adapt Pocketsphinx', " ".join(description)))
                    conn.commit()
                else:
                    continue_next = False
        except Exception as e:
            continue_next = False
            message = "Unknown"
            if hasattr(e, "message"):
                message = e.message
            self._logger.error("Error: {}".format(message), exc_info=True)
            response.append('<span class="failure">{}</span>'.format(message))
        if not continue_next:
            nextcommand = ""
        return response, nextcommand, description
 print(wakewords)
 if (os.path.isfile(p_args.filename)):
     print("File {} exists, exiting".format(p_args.filename))
 else:
     with (open(p_args.filename, 'w')) as f:
         # Load the STT_Trainer plugins
         plugin_directories = [
             paths.config('plugins'),
             pkg_resources.resource_filename(__name__,
                                             os.path.join('plugins'))
         ]
         plugins = pluginstore.PluginStore(plugin_directories)
         plugins.detect_plugins()
         for info in plugins.get_plugins_by_category("speechhandler"):
             try:
                 plugin = info.plugin_class(info, profile.get_profile())
                 if (hasattr(plugin, "intents")):
                     intents = plugin.intents()
                     for intent in intents:
                         print(intent)
                         for template in intents[intent]['locale']['en-US'][
                                 'templates']:
                             for wakeword in wakewords:
                                 f.write("{} {}\n".format(
                                     wakeword.lower(),
                                     re.sub('\{(.*?)\}', '',
                                            template).lower()))
                                 f.write("{} {}\n".format(
                                     re.sub('\{(.*?)\}', '',
                                            template).lower(),
                                     wakeword.lower()))
Exemplo n.º 8
0
def application(environ, start_response):
    keyword = profile.get_profile_var(["keyword"], ["Naomi"])
    if (isinstance(keyword, list)):
        keyword = keyword[0]
    print("PATH_INFO=%s" % environ["PATH_INFO"])
    if (environ["PATH_INFO"] == "/favicon.ico"):
        start_response('404 Not Found',
                       [('content-type', 'text/plain;charset=utf-8')])
        ret = ["404 Not Found"]
        return [line.encode("UTF-8") for line in ret]
    else:
        audiolog_dir = paths.sub("audiolog")
        audiolog_db = os.path.join(audiolog_dir, "audiolog.db")
        wavfile = ""
        rowID = ""
        first_rowID = ""
        prev_rowID = ""
        next_rowID = ""
        result = ""
        speaker = ""
        verified_transcription = ""
        post_data = ""
        engine = ""
        verified_intent = ""
        description = []
        reQS = re.compile("([^=]+)=([^&]*)&?")

        # gather parameters from GET
        if (environ["QUERY_STRING"]):
            for namevalue in reQS.findall(environ["QUERY_STRING"]):
                if (namevalue[0].lower() == "wavfile"):
                    wavfile = os.path.join(audiolog_dir, namevalue[1])
                if (namevalue[0].lower() == "rowid"):
                    rowID = namevalue[1]

        # gather parameters from POST
        content_length = 0
        if (environ['CONTENT_LENGTH']):
            content_length = int(environ['CONTENT_LENGTH'])
            post_data = environ['wsgi.input'].read(content_length).decode(
                "UTF-8")
            # Parse it out
            for namevalue in reQS.findall(post_data):
                if (namevalue[0].lower() == "rowid"):
                    rowID = namevalue[1].lower()
                if (namevalue[0].lower() == "result"):
                    result = namevalue[1].lower()
                if (namevalue[0].lower() == "verified_transcription"):
                    verified_transcription = unquote(namevalue[1].replace(
                        '+', ' '))
                if (namevalue[0].lower() == "engine"):
                    engine = unquote(namevalue[1])
                if (namevalue[0].lower() == "command"):
                    command = unquote(namevalue[1].lower())
                if (namevalue[0].lower() == "description"):
                    description.append(unquote(namevalue[1]))
                if (namevalue[0].lower() == "speaker"):
                    speaker = namevalue[1].replace('+', ' ')
                if (namevalue[0].lower() == "verified_intent"):
                    verified_intent = namevalue[1].replace('+', ' ')

        # Handle the request
        # serve a .wav file
        ErrorMessage = None
        if (len(wavfile) and os.path.isfile(wavfile)):
            start_response('200 OK', [('content-type', 'audio/wav')])
            with open(wavfile, "rb") as w:
                ret = [w.read()]
            return ret
        # open a connection to the database
        try:
            conn = sqlite3.connect(audiolog_db)
        except sqlite3.OperationalError:
            ret = []
            start_response('200 OK',
                           [('content-type', 'text/html;charset=utf-8')])
            ret.append(
                "<html><head><title>Could not open database</title></head>")
            ret.append("<body><h2>Could not open database file {}</h2>".format(
                audiolog_db))
            ret.append(
                "<p>Try adding the following lines to your profile ({}) and then asking me a few questions:<br />"
                .format(profile.profile_file))
            ret.append("<pre>\taudiolog:\n\t\tsave_audio\n</pre>")
            return [line.encode("UTF-8") for line in ret]
        c = conn.cursor()
        # Check and make sure the speaker field exists
        c.execute("select distinct speaker from audiolog order by 1")
        # fetchall returns all rows as tuples, take the first (and only)
        # element of each tuple
        speakers = [speaker[0] for speaker in c.fetchall()]
        # Start the html response
        ret = []
        # serve a train response. We will put this in a div on the Train
        # tab, so we don't have to regenerate everything.
        if (len(engine)):
            start_response('200 OK',
                           [('content-type', 'text/json;charset=utf-8')])
            continue_next = True
            nextcommand = ""
            response = []
            found_plugin = False
            for info in plugins.get_plugins_by_category('stt_trainer'):
                if (info.name == engine):
                    found_plugin = True
                    try:
                        plugin = info.plugin_class(info, profile.get_profile())
                        print("plugin.HandleCommand({}, {})".format(
                            command, description))
                        response, nextcommand, description = plugin.HandleCommand(
                            command, description)
                    except Exception as e:
                        _logger.warn(
                            "Plugin '{}' skipped! (Reason: {})".format(
                                info.name, e.message
                                if hasattr(e, 'message') else 'Unknown'),
                            exc_info=True)
            if (not found_plugin):
                response = ["Unknown STT Trainer: {}".format(engine)]
            # Prepare the json response
            messagetext = "<br /><br />\n".join(response)
            if (not continue_next):
                nextcommand = ""
            jsonstr = json.dumps({
                'message': messagetext,
                'engine': engine,
                'command': nextcommand,
                'description': description
            })
            ret.append(jsonstr)
        else:
            start_response('200 OK',
                           [('content-type', 'text/html;charset=utf-8')])
            ret.append(
                '<html><head><title>{} STT Training</title>'.format(keyword))
            # Return the main page
            try:
                # If we are performing an update,
                # do so and fetch the next row id
                if (result and rowID):
                    print("Result: {}".format(result))
                    # rowid should have been passed in
                    # if the rowid that was passed in does not exist,
                    # the following lines will have no effect
                    # FIXME: in this case, an error should be returned.
                    Update_record = Get_row(c, rowID)
                    now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
                    if (Update_record):
                        # Since an audio file can be associated with more
                        # than one transcription (since both a passive and
                        # active transcription can be run on the same audio
                        # file) we need to update each record with the same
                        # transcription with its own WER.
                        # Get a list of records that have the same filename
                        result = result.lower()
                        if (result == "correct"):
                            # if the current result matches the transcription,
                            # then verified transcription = transcription
                            # and the type should remain unchanged
                            verified_transcription = Update_record[
                                'Transcription']
                        if (result == "noise"):
                            # If the user selected "noise" then the verified
                            # transcription is blank
                            verified_transcription = ""
                        if (result == "unclear"):
                            # Unclear means that there is no verified
                            # transcription and the sample is unusable
                            # for either training the speech recognition
                            # or the noise detection. We can go ahead
                            # and run WER on it, but it is unlikely to
                            # be used.
                            verified_transcription = ""
                            recording_type = "unclear"
                        verified_transcription = verified_transcription.strip()
                        filename = Update_record["Filename"]
                        c.execute(
                            " ".join([
                                "select", " RowID,", " transcription,",
                                " type", "from audiolog",
                                "where RowID=:RowID or (",
                                " filename=:filename and reviewed=''", ")"
                            ]), {
                                'RowID': rowID,
                                'filename': filename
                            })
                        for row in c.fetchall():
                            rowID = row[0]
                            transcription = row[1]
                            recording_type = row[2]
                            if (len(verified_transcription) == 0):
                                recording_type = "noise"
                                print("Setting recording_type to noise")
                                if (result == "unclear"):
                                    recording_type = "unclear"
                                    print("Setting recording_type to unclear")
                            # calculate the word error rate
                            WER = 0
                            if (len(transcription) > 0):
                                WER = wer(transcription,
                                          verified_transcription)
                            c.execute(
                                " ".join([
                                    "update audiolog set ", " type=:type,",
                                    " verified_transcription=:vt,",
                                    " speaker=:speaker,"
                                    " reviewed=:reviewed,", " wer=:wer,",
                                    " verified_intent=:verified_intent",
                                    "where RowID=:RowID"
                                ]), {
                                    "type": recording_type,
                                    "vt": verified_transcription,
                                    "speaker": speaker,
                                    "reviewed": now,
                                    "wer": WER,
                                    "verified_intent": verified_intent,
                                    "RowID": rowID
                                })
                            conn.commit()
                        # fetch the next unreviewed rowid
                        rowID = fetch_next_unreviewed_rowID(c, rowID)
                    else:
                        ErrorMessage = "Row ID {} does not exist".format(
                            str(rowID))
                # get the first rowID
                first_rowID = fetch_first_rowID(c)
                # get the current rowID
                try:
                    rowID = fetch_current_rowID(c, rowID)
                except ValueError:
                    ErrorMessage = "Row {} not found".format(rowID)
                    rowID = fetch_current_rowID(c, None)
                # get the previous rowid
                prev_rowID = fetch_prev_rowID(c, rowID)
                # get the next rowid
                next_rowID = fetch_next_rowID(c, rowID)
                totalRows = fetch_total_rows(c)

                if (len(first_rowID)):
                    ret.append("""
<meta charset="utf-8"/>
<style type="text/css">
 /* Style the tab */
.tab {
  overflow: hidden;
  border: 1px solid #ccc;
  background-color: #f1f1f1;
}
/* Style the buttons that are used to open the tab content */
.tab button {
  background-color: inherit;
  float: left;
  border: none;
  outline: none;
  cursor: pointer;
  padding: 14px 16px;
  transition: 0.3s;
}
/* Change background color of buttons on hover */
.tab button:hover {
  background-color: #ddd;
}
/* Create an active/current tablink class */
.tab button.active {
  background-color: #ccc;
}
/* Style the tab content */
.tabcontent {
  display: none;
  padding: 6px 12px;
  border: 1px solid #ccc;
  border-top: none;
}
.tabcontent.active {
  display: block;
}
.success {
  color: #0f0; /* green */
}
.failure {
  color: #f00; /* red */
}
</style>
<script language="javascript">
    var spin=0; // global spinner control
    var spintimer;
    function startSpinner(){
        // kill any old spinner
        window.clearTimeout(spintimer);
        spin=1;
        spintimer=window.setTimeout(function(){moveSpinner(0)},250);
    }
    function moveSpinner(position){
        var s=document.getElementById("spinner");
        switch(position){
            case 0:
                s.innerHTML="-";
                break;
            case 1:
                s.innerHTML="\\\\";
                break;
            case 2:
                s.innerHTML="|";
                break;
            case 3:
                s.innerHTML="/";
                break;
        }
        if(spin){
            spintimer=window.setTimeout(function(){moveSpinner((position+1)%4)},250);
        }else{
            s.innerHTML="";
        }
    }
    function stopSpinner(){
        window.clearTimeout(spintimer);
        spin=0;
    }
    function openTab(evt, tabName) {
        // Declare all variables
        var i, tabcontent, tablinks;
        // Get all elements with class="tabcontent" and hide them
        tabcontent = document.getElementsByClassName("tabcontent");
        for (i = 0; i < tabcontent.length; i++) {
            tabcontent[i].className = "tabcontent";
        }
        // Get all elements with class="tablinks" and remove the class "active"
        tablinks = document.getElementsByClassName("tablinks");
        for (i = 0; i < tablinks.length; i++) {
            tablinks[i].className = tablinks[i].className.replace(" active", "");
        }
        // Show the current tab, and add an "active" class to the button that opened the tab
        document.getElementById(tabName).className = "tabcontent active";
        evt.currentTarget.className += " active";
    }

    // Submit an updated transcription to the server. Upon success,
    // make the "revert" button inactive
    function UpdateTranscription(RowID){
        var Transcription=document.getElementById("transcription_"+RowID).value;
        alert( "Transcription="+Transcription );
        var xhttp=new XMLHttpRequest();
        xhttp.onreadystatechange=function(){
            if( this.readyState==4 && this.status==200 ){
                // Check this.responseText
                var message=JSON.parse(this.responseText).message;
                if( message=="SUCCESS;Updated "+RowID ){
                    // disable reset button
                    document.getElementById("reset_"+RowID).disabled=true;
                }else{
                    //alert( "message="+message );
                }
            }else{
                //alert( "responseText="+this.responseText );
            }
        }
        xhttp.open("POST",window.location.href.split(/[?#]/)[0],true);
        var request=JSON.stringify({"action":"update","RowID":RowID,"Transcription":Transcription});
        xhttp.send(request);
    }

    // Delete a line from the database and, if the response is success,
    // delete the line from the page also.
    function DeleteAudio(RowID){
        var xhttp=new XMLHttpRequest();
        xhttp.onreadystatechange=function(){
            if( this.readyState==4 && this.status==200 ){
                // Check this.responseText to make sure it contains a success message
                var message=JSON.parse(this.responseText).message;
                if( message=="SUCCESS;Deleted "+RowID ){
                    document.getElementById("r"+RowID).parentNode.removeChild(document.getElementById("r"+RowID));
                }else{
                    //alert(message);
                }
            }
        };
        xhttp.open("POST",window.location.href.split(/[?#]/)[0],true);
        var request='{"action":"delete","RowID":"'+RowID+'"}';
        xhttp.send(request);
    }

    function GoRowID(RowID){
        document.location.href="http://"+window.location.host+window.location.pathname+"?RowID="+RowID;
    }

    function ValidateForm(){
        var Checked=document.querySelector("input[name='result']:checked");
        var Ret=true;
        if( !Checked ){
            Ret=false;
            alert("Please select an option");
        }
        return Ret;
    }

    function Train(clear, engine, command, description){
        stopSpinner();
        if(clear){
            document.getElementById("Result").innerHTML = "";
        }
        var xhttp = new XMLHttpRequest();
        xhttp.onreadystatechange = function(){
            if(this.readyState==4){
                stopSpinner();
                if(this.status==200){
                    var response=JSON.parse(this.responseText);
                    document.getElementById("Result").innerHTML += response.message + '<br /><br />';
                    if(response.command){
                        var description = "";
                        if(response.description){
                            description = response.description;
                        }
                        Train(false,response.engine,response.command,description);
                    }else{
                        document.getElementById("Result").innerHTML += "<h2>Training Complete</h2>";
                    }
                }else{
                    document.getElementById("Result").innerHTML += "An error occurred. ReadyState: "+this.readyState+" Status: "+this.status+"<br />"+this.responseText;
                }
            }
        };
        url = location.toString().replace(location.search, "");

        xhttp.open("POST",url,true);
        xhttp.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
        xhttp.send("engine="+encodeURIComponent(engine)+"&command="+encodeURIComponent(command)+"&description="+encodeURIComponent(description));
        startSpinner();
    }
</script>""")
                    ret.append('''
</head>
<body>
<!-- Tab links -->
<div class="tab">
  <button class="tablinks active" onclick="openTab(event, 'Verify')">Verify transciptions</button>
  <button class="tablinks" onclick="openTab(event, 'Train')">Train STT Engines</button>
</div>
<!-- Tab content -->
<div id="Verify" class="tabcontent active">
                    ''')

                    Current_record = Get_row(c, rowID)

                    # if this has been reviewed, figure out
                    # what option was selected
                    Checked = 'checked="checked"'
                    Unchecked = ''
                    Disabled = 'disabled="disabled"'
                    Enabled = ''
                    Result_correct = Unchecked
                    Result_update = Unchecked
                    Result_nothing = Unchecked
                    Result_unclear = Unchecked
                    Verified_transcription_state = Disabled
                    if (len(Current_record["Reviewed"])):
                        if (Current_record["Verified_transcription"]):
                            if (Current_record["Transcription"] ==
                                    Current_record["Verified_transcription"]):
                                Result_correct = Checked
                            else:
                                Result_update = Checked
                                Verified_transcription_state = Enabled
                        else:
                            if (Current_record["Type"] == "noise"):
                                Result_nothing = Checked
                            else:
                                Result_unclear = Checked

                    if (not Current_record["Verified_transcription"]):
                        Current_record[
                            "Verified_transcription"] = Current_record[
                                "Transcription"]

                    # Serve the body of the page
                    if (Debug):
                        # Debug info
                        ret.append("""<ul>""")
                        ret.append(
                            """<li>post_data: {}</li>""".format(post_data))
                        ret.append("""<li>Result: {}</li>""".format(result))

                        if (result == "update"):
                            ret.append(
                                "<li>Verified_transcription: {}</li>".format(
                                    verified_transcription))
                        ret.append("</ul>")

                        ret.append("<ul>")
                        ret.append("<li>Recorded: {}</li>".format(
                            Current_record["Recorded"]))
                        ret.append("<li>Filename: {}</li>".format(
                            Current_record["Filename"]))
                        ret.append("<li>Type: {}</li>".format(
                            Current_record["Type"]))
                        ret.append("<li>Transcription: {}</li>".format(
                            Current_record["Transcription"]))
                        ret.append(
                            "<li>Verified_transcription: {}</li>".format(
                                Current_record["Verified_transcription"]))
                        ret.append("<li>Speaker: {}</li>".format(
                            Current_record["Speaker"]))
                        ret.append("<li>Speaker: {}</li>".format(
                            Current_record["Speaker"]))
                        ret.append("<li>Reviewed: {}</li>".format(
                            Current_record["Reviewed"]))
                        ret.append("<li>Wer: {}</li>".format(
                            Current_record["WER"]))
                        ret.append("<li>Result_correct: {}</li>".format(
                            Result_correct))
                        ret.append("""<li>Result_update: {}</li>""".format(
                            Result_update))
                        ret.append("""<li>Result_nothing: {}</li>""".format(
                            Result_nothing))
                        ret.append("""</ul>""")

                    ret.append(
                        """<h1>{} transcription {} of {} ({})</h1>""".format(
                            keyword, rowID, totalRows, Current_record["Type"]))
                    if (ErrorMessage):
                        ret.append(
                            """<p class="Error">{}</p>""".format(ErrorMessage))
                    ret.append(" ".join([
                        '<audio', 'controls="controls"', 'type="audio/wav"',
                        'style="width:100%%">', '<source src="?wavfile={}" />',
                        '</audio><br />'
                    ]).format(Current_record["Filename"]))
                    ret.append(' '.join([
                        '{} heard',
                        '"<span style="font-weight:bold">{}</span>"<br />'
                    ]).format(keyword, Current_record["Transcription"]))
                    ret.append("What did you hear?<br />")
                    ret.append(' '.join([
                        '<form method="POST"',
                        'onsubmit="return ValidateForm()">'
                    ]))
                    ret.append(
                        '<input type="hidden" name="RowID" value="{}"/>'.
                        format(rowID))
                    ret.append(
                        """<input type="radio" id="update_result_correct" name="result" value="correct" {} onclick="document.getElementById('update_verified_transcription').disabled=true"/> <label for="update_result_correct">The transcription is correct. I heard the same thing</label><br />"""
                        .format(Result_correct))
                    ret.append(
                        """<input type="radio" id="update_result_update" name="result" value="update" {} onclick="document.getElementById('update_verified_transcription').disabled=false"/> <label for="update_result_update">The transcription is not correct. This is what I heard:</label><br /><textarea id="update_verified_transcription" name="verified_transcription" style="margin-left: 20px" {}>{}</textarea><br />"""
                        .format(Result_update, Verified_transcription_state,
                                Current_record["Verified_transcription"]))
                    ret.append(
                        """<input type="radio" id="update_result_nothing" name="result" value="noise" {} onclick="document.getElementById('update_verified_transcription').disabled=true"/> <label for="update_result_nothing">This was just noise with no voices.</label><br />"""
                        .format(Result_nothing))
                    ret.append(
                        """<input type="radio" id="update_result_unclear" name="result" value="unclear" {} onclick="document.getElementById('update_verified_transcription').disabled=true"/> <label for="update_result_unclear">This was not directed to {} or was too unclear to understand.</label><br />"""
                        .format(Result_unclear, keyword))
                    ret.append(
                        """<label for="Speaker">Speaker</label><br /><input type="text" id="Speaker" name="Speaker" value="{}" list="speakerList"><datalist id="speakerList">"""
                        .format(Current_record["Speaker"] if len(
                            Current_record["Speaker"]) else speaker))
                    for speaker in speakers:
                        ret.append("""<option value="{}">""".format(speaker))
                    ret.append("""</datalist><br /><br />""")
                    if (Current_record["Type"] == 'active'):
                        Verified_intent = Current_record["verified_intent"]
                        if (Verified_intent == "None"):
                            Verified_intent = Current_record["intent"]
                        ret.append("""Intent: {} ({})<br />""".format(
                            Current_record["intent"], Current_record["score"]))
                        ret.append(
                            """Correct intent: <select name="Verified_Intent">"""
                        )
                        ret.append(
                            """<option value="unclear">unclear</option>""")
                        for intent in fetch_intents(c):
                            selected = ""
                            if (intent == Verified_intent):
                                selected = " selected"
                            ret.append("""<option{}>{}</option>""".format(
                                selected, intent))
                        ret.append("""</select><br /><br />""")
                    ret.append('<input type="submit" value="Submit"/><br />')
                    if (prev_rowID):
                        ret.append(' '.join([
                            '<input type="button" value="Prev"',
                            'onclick="GoRowID({})"/>'
                        ]).format(prev_rowID))
                    if (next_rowID):
                        ret.append(' '.join([
                            '<input type="button" value="Next"',
                            'onclick="GoRowID({})"/>'
                        ]).format(next_rowID))
                    else:
                        ret.append("""All transcriptions verified""")
                    ret.append('''
</div><!-- Verify -->
<div id="Train" class="tabcontent">
<form name="Train">
                    ''')
                    for info in plugins.get_plugins_by_category('stt_trainer'):
                        ret.append(
                            '''<input type="button" value="{plugin_name}" onclick="Train(true,'{plugin_name}','','')"><br />'''
                            .format(plugin_name=info.name))
                    ret.append('''
</form>
<div id="Result">
</div>
<div id="spinner">
</div>
</div><!-- Train -->
                    ''')
                    ret.append("""</body></html>""")
                else:
                    ret = [
                        "".join([
                            "<html>",
                            "<head><title>Nothing to validate</title></head>",
                            "<body><h1>Nothing to validate</h1></body></html>"
                        ])
                    ]
            except sqlite3.OperationalError as e:
                ret.append("".join(
                    ['</head>', '<body>SQLite error: {}</body>',
                     '</html>']).format(e))
        # Save (commit) the changes
        conn.commit()
        conn.close()
        return [line.encode("UTF-8") for line in ret]