def __init__(self, rhasspyConfig="./intents/serindaCommands.ini"): f = open(rhasspyConfig, "r") self.intentFile = f.read() # Load and parse self.intents = rhasspynlu.parse_ini(self.intentFile) self.graph = rhasspynlu.intents_to_graph(self.intents)
def get_all_intents(ini_paths: List[Path]) -> Dict[str, Any]: """Get intents from all .ini files in profile.""" try: with io.StringIO() as combined_ini_file: for ini_path in ini_paths: combined_ini_file.write(ini_path.read_text()) print("", file=combined_ini_file) return rhasspynlu.parse_ini(combined_ini_file.getvalue()) except Exception: _LOGGER.exception("Failed to parse %s", ini_paths) return {}
def setUp(self): self.siteId = str(uuid.uuid4()) self.sessionId = str(uuid.uuid4()) ini_text = """ [SetLightColor] set the (bedroom | living room){name} light to (red | green | blue){color} """ self.graph = intents_to_graph(parse_ini(ini_text)) self.client = MagicMock() self.hermes = NluHermesMqtt(self.client, self.graph, siteIds=[self.siteId])
def setUp(self): self.site_id = str(uuid.uuid4()) self.session_id = str(uuid.uuid4()) ini_text = """ [SetLightColor] set the (bedroom | living room){name} light to (red | green | blue){color} [GetTime] what time is it """ self.graph = intents_to_graph(parse_ini(ini_text)) self.examples = rhasspyfuzzywuzzy.train(self.graph) self.client = MagicMock() self.hermes = NluHermesMqtt( self.client, self.graph, examples=self.examples, confidence_threshold=1.0, site_ids=[self.site_id], )
async def train_profile(profile_dir: Path, profile: typing.Dict[str, typing.Any]) -> None: """Re-generate speech/intent artifacts for profile.""" # Compact def ppath(query, default=None): return utils_ppath(profile, profile_dir, query, default) language_code = pydash.get(profile, "language.code", "en-US") sentences_ini = ppath("training.sentences-file", "sentences.ini") slots_dir = ppath("training.slots-directory", "slots") slot_programs = ppath("training.slot-programs-directory", "slot_programs") # Profile files that are split into parts and gzipped large_paths = [ Path(p) for p in pydash.get(profile, "training.large-files", []) ] # ------------------- # Speech to text # ------------------- base_dictionary = ppath("training.base-dictionary", "base_dictionary.txt") custom_words = ppath("training.custom-words-file", "custom_words.txt") custom_words_action = PronunciationAction( pydash.get(profile, "training.custom-words-action", "append")) sounds_like = ppath("training.sounds-like-file", "sounds_like.txt") sounds_like_action = PronunciationAction( pydash.get(profile, "training.sounds-like-action", "append")) acoustic_model = ppath("training.acoustic-model", "acoustic_model") acoustic_model_type = AcousticModelType( pydash.get(profile, "training.acoustic-model-type", AcousticModelType.DUMMY)) # Replace numbers with words replace_numbers = bool( pydash.get(profile, "training.replace-numbers", True)) # ignore/upper/lower word_casing = pydash.get(profile, "training.word-casing", WordCasing.IGNORE) # Large pre-built language model base_language_model_fst = ppath("training.base-language-model-fst", "base_language_model.fst") base_language_model_weight = float( pydash.get(profile, "training.base-language-model-weight", 0)) # ------------------- # Grapheme to phoneme # ------------------- g2p_model = ppath("training.grapheme-to-phoneme-model", "g2p.fst") g2p_corpus = ppath("training.grapheme-to-phoneme-corpus", "g2p.corpus") # default/ignore/upper/lower g2p_word_casing = pydash.get(profile, "training.g2p-word-casing", word_casing) # ------- # Outputs # ------- dictionary_path = ppath("training.dictionary", "dictionary.txt") language_model_path = ppath("training.language-model", "language_model.txt") language_model_fst_path = ppath("training.language-model-fst", "language_model.fst") mixed_language_model_fst_path = ppath("training.mixed-language-model-fst", "mixed_language_model.fst") intent_graph_path = ppath("training.intent-graph", "intent.pickle.gz") vocab_path = ppath("training.vocabulary-file", "vocab.txt") unknown_words_path = ppath("training.unknown-words-file", "unknown_words.txt") async def run(command: typing.List[str], **kwargs): """Run a command asynchronously.""" process = await asyncio.create_subprocess_exec(*command, **kwargs) await process.wait() assert process.returncode == 0, "Command failed" # ------------------------------------------------------------------------- # 1. Reassemble large files # ------------------------------------------------------------------------- for target_path in large_paths: gzip_path = Path(str(target_path) + ".gz") part_paths = sorted( list(gzip_path.parent.glob(f"{gzip_path.name}.part-*"))) if part_paths: # Concatenate paths to together cat_command = ["cat"] + [str(p) for p in part_paths] _LOGGER.debug(cat_command) with open(gzip_path, "wb") as gzip_file: await run(cat_command, stdout=gzip_file) if gzip_path.is_file(): # Unzip single file unzip_command = ["gunzip", "-f", "--stdout", str(gzip_path)] _LOGGER.debug(unzip_command) with open(target_path, "wb") as target_file: await run(unzip_command, stdout=target_file) # Delete zip file gzip_path.unlink() # Delete unneeded .gz-part files for part_path in part_paths: part_path.unlink() # ------------------------------------------------------------------------- # 2. Generate intent graph # ------------------------------------------------------------------------- # Parse JSGF sentences _LOGGER.debug("Parsing %s", sentences_ini) intents = rhasspynlu.parse_ini(sentences_ini) # Split into sentences and rule/slot replacements sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents) word_transform = None if word_casing == WordCasing.UPPER: word_transform = str.upper elif word_casing == WordCasing.LOWER: word_transform = str.lower word_visitor: typing.Optional[typing.Callable[[Expression], typing.Union[ bool, Expression]]] = None if word_transform: # Apply transformation to words def transform_visitor(word: Expression): if isinstance(word, Word): assert word_transform new_text = word_transform(word.text) # Preserve case by using original text as substition if (word.substitution is None) and (new_text != word.text): word.substitution = word.text word.text = new_text return word word_visitor = transform_visitor # Apply case/number transforms if word_visitor or replace_numbers: for intent_sentences in sentences.values(): for sentence in intent_sentences: if replace_numbers: # Replace number ranges with slot references # type: ignore rhasspynlu.jsgf.walk_expression( sentence, rhasspynlu.number_range_transform, replacements) if word_visitor: # Do case transformation # type: ignore rhasspynlu.jsgf.walk_expression(sentence, word_visitor, replacements) # Load slot values slot_replacements = rhasspynlu.get_slot_replacements( intents, slots_dirs=[slots_dir], slot_programs_dirs=[slot_programs], slot_visitor=word_visitor, ) # Merge with existing replacements for slot_key, slot_values in slot_replacements.items(): replacements[slot_key] = slot_values if replace_numbers: # Do single number transformations for intent_sentences in sentences.values(): for sentence in intent_sentences: rhasspynlu.jsgf.walk_expression( sentence, lambda w: rhasspynlu.number_transform(w, language_code), replacements, ) # Convert to directed graph intent_graph = rhasspynlu.sentences_to_graph(sentences, replacements=replacements) # Convert to gzipped pickle intent_graph_path.parent.mkdir(exist_ok=True) with open(intent_graph_path, mode="wb") as intent_graph_file: rhasspynlu.graph_to_gzip_pickle(intent_graph, intent_graph_file) _LOGGER.debug("Wrote intent graph to %s", intent_graph_path) g2p_word_transform = None if g2p_word_casing == WordCasing.UPPER: g2p_word_transform = str.upper elif g2p_word_casing == WordCasing.LOWER: g2p_word_transform = str.lower # Load phonetic dictionaries pronunciations: PronunciationsType = {} if acoustic_model_type in [ AcousticModelType.POCKETSPHINX, AcousticModelType.KALDI, AcousticModelType.JULIUS, ]: pronunciations, _ = load_pronunciations( base_dictionary=base_dictionary, custom_words=custom_words, custom_words_action=custom_words_action, sounds_like=sounds_like, sounds_like_action=sounds_like_action, g2p_corpus=g2p_corpus, ) # ------------------------------------------------------------------------- # Speech to Text Training # ------------------------------------------------------------------------- if acoustic_model_type == AcousticModelType.POCKETSPHINX: # Pocketsphinx import rhasspyasr_pocketsphinx rhasspyasr_pocketsphinx.train( intent_graph, dictionary_path, language_model_path, pronunciations, dictionary_word_transform=word_transform, g2p_model=g2p_model, g2p_word_transform=g2p_word_transform, missing_words_path=unknown_words_path, vocab_path=vocab_path, language_model_fst=language_model_fst_path, base_language_model_fst=base_language_model_fst, base_language_model_weight=base_language_model_weight, mixed_language_model_fst=mixed_language_model_fst_path, ) elif acoustic_model_type == AcousticModelType.KALDI: # Kaldi import rhasspyasr_kaldi from rhasspyasr_kaldi.train import LanguageModelType graph_dir = ppath("training.kaldi.graph-directory") or ( acoustic_model / "graph") # Type of language model to generate language_model_type = LanguageModelType( pydash.get(profile, "training.kaldi.language-model-type", "arpa")) rhasspyasr_kaldi.train( intent_graph, pronunciations, acoustic_model, graph_dir, dictionary_path, language_model_path, language_model_type=language_model_type, dictionary_word_transform=word_transform, g2p_model=g2p_model, g2p_word_transform=g2p_word_transform, missing_words_path=unknown_words_path, vocab_path=vocab_path, language_model_fst=language_model_fst_path, base_language_model_fst=base_language_model_fst, base_language_model_weight=base_language_model_weight, mixed_language_model_fst=mixed_language_model_fst_path, ) elif acoustic_model_type == AcousticModelType.DEEPSPEECH: # DeepSpeech import rhasspyasr_deepspeech trie_path = ppath("training.deepspeech.trie", "trie") alphabet_path = ppath("training.deepspeech.alphabet", "model/alphabet.txt") rhasspyasr_deepspeech.train( intent_graph, language_model_path, trie_path, alphabet_path, vocab_path=vocab_path, language_model_fst=language_model_fst_path, base_language_model_fst=base_language_model_fst, base_language_model_weight=base_language_model_weight, mixed_language_model_fst=mixed_language_model_fst_path, ) elif acoustic_model_type == AcousticModelType.JULIUS: # Julius from .julius import train as train_julius train_julius( intent_graph, dictionary_path, language_model_path, pronunciations, dictionary_word_transform=word_transform, silence_words={"<s>", "</s>"}, g2p_model=g2p_model, g2p_word_transform=g2p_word_transform, missing_words_path=unknown_words_path, vocab_path=vocab_path, language_model_fst=language_model_fst_path, base_language_model_fst=base_language_model_fst, base_language_model_weight=base_language_model_weight, mixed_language_model_fst=mixed_language_model_fst_path, ) else: _LOGGER.warning("Not training speech to text system (%s)", acoustic_model_type)
def train( sentences_dict: typing.Dict[str, str], language: str, slots_dict: typing.Optional[typing.Dict[str, typing.List[str]]] = None, engine_path: typing.Optional[typing.Union[str, Path]] = None, dataset_path: typing.Optional[typing.Union[str, Path]] = None, ) -> SnipsNLUEngine: """Generate Snips YAML dataset from Rhasspy sentences/slots.""" slots_dict = slots_dict or {} _LOGGER.debug("Creating Snips engine for language %s", language) engine = SnipsNLUEngine(config=DEFAULT_CONFIGS[language]) # Parse JSGF sentences _LOGGER.debug("Parsing sentences") with io.StringIO() as ini_file: # Join as single ini file for lines in sentences_dict.values(): print(lines, file=ini_file) print("", file=ini_file) intents = rhasspynlu.parse_ini(ini_file.getvalue()) # Split into sentences and rule/slot replacements sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents) for intent_sentences in sentences.values(): for sentence in intent_sentences: rhasspynlu.jsgf.walk_expression(sentence, rhasspynlu.number_range_transform, replacements) # Convert to directed graph *without* expanding slots # (e.g., $rhasspy/number) _LOGGER.debug("Converting to intent graph") intent_graph = rhasspynlu.sentences_to_graph(sentences, replacements=replacements, expand_slots=False) # Get start/end nodes for graph start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes( intent_graph) assert (start_node is not None) and (end_node is not None), "Missing start/end node(s)" if dataset_path: # Use user file dataset_file = open(dataset_path, "w+") else: # Use temporary file dataset_file = typing.cast( typing.TextIO, tempfile.NamedTemporaryFile(suffix=".yml", mode="w+")) dataset_path = dataset_file.name with dataset_file: _LOGGER.debug("Writing YAML dataset to %s", dataset_path) # Walk first layer of edges with intents for _, intent_node, edge_data in intent_graph.edges(start_node, data=True): intent_name: str = edge_data["olabel"][9:] # New intent print("---", file=dataset_file) print("type: intent", file=dataset_file) print("name:", quote(intent_name), file=dataset_file) print("utterances:", file=dataset_file) # Get all paths through the graph (utterances) used_utterances: typing.Set[str] = set() paths = nx.all_simple_paths(intent_graph, intent_node, end_node) for path in paths: utterance = [] entity_name = None slot_name = None slot_value = None # Walk utterance edges for from_node, to_node in rhasspynlu.utils.pairwise(path): edge_data = intent_graph.edges[(from_node, to_node)] ilabel = edge_data.get("ilabel") olabel = edge_data.get("olabel") if olabel: if olabel.startswith("__begin__"): slot_name = olabel[9:] entity_name = None slot_value = "" elif olabel.startswith("__end__"): if entity_name == "rhasspy/number": # Transform to Snips number entity_name = "snips/number" elif not entity_name: # Collect actual value assert ( slot_name and slot_value ), f"No slot name or value (name={slot_name}, value={slot_value})" entity_name = slot_name slot_values = slots_dict.get(slot_name) if not slot_values: slot_values = [] slots_dict[slot_name] = slot_values slot_values.append(slot_value.strip()) # Reference slot/entity (values will be added later) utterance.append(f"[{slot_name}:{entity_name}]") # Reset current slot/entity entity_name = None slot_name = None slot_value = None elif olabel.startswith("__source__"): # Use Rhasspy slot name as entity entity_name = olabel[10:] if ilabel: # Add to current slot/entity value if slot_name and (not entity_name): slot_value += ilabel + " " else: # Add directly to utterance utterance.append(ilabel) elif (olabel and (not olabel.startswith("__")) and slot_name and (not slot_value) and (not entity_name)): slot_value += olabel + " " if utterance: utterance_str = " ".join(utterance) if utterance_str not in used_utterances: # Write utterance print(" -", quote(utterance_str), file=dataset_file) used_utterances.add(utterance_str) print("", file=dataset_file) # Write entities for slot_name, values in slots_dict.items(): if slot_name.startswith("$"): # Remove arguments and $ slot_name = slot_name.split(",")[0][1:] # Skip numbers if slot_name in {"rhasspy/number"}: # Should have been converted already to snips/number continue # Keep only unique values values_set = set(values) print("---", file=dataset_file) print("type: entity", file=dataset_file) print("name:", quote(slot_name), file=dataset_file) print("values:", file=dataset_file) slot_graph = rhasspynlu.sentences_to_graph({ slot_name: [ rhasspynlu.jsgf.Sentence.parse(value) for value in values_set ] }) start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes( slot_graph) n_data = slot_graph.nodes(data=True) for path in nx.all_simple_paths(slot_graph, start_node, end_node): words = [] for node in path: node_data = n_data[node] word = node_data.get("word") if word: words.append(word) if words: print(" -", quote(" ".join(words)), file=dataset_file) print("", file=dataset_file) # ------------ # Train engine # ------------ if engine_path: # Delete existing engine engine_path = Path(engine_path) engine_path.parent.mkdir(exist_ok=True) if engine_path.is_dir(): # Snips will fail it the directory exists _LOGGER.debug("Removing existing engine at %s", engine_path) shutil.rmtree(engine_path) elif engine_path.is_file(): _LOGGER.debug("Removing unexpected file at %s", engine_path) engine_path.unlink() _LOGGER.debug("Training engine") dataset_file.seek(0) dataset = Dataset.from_yaml_files(language, [dataset_file]) engine = engine.fit(dataset) if engine_path: # Save engine engine.persist(engine_path) _LOGGER.debug("Engine saved to %s", engine_path) return engine
def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]: # Compact def ppath(query, default=None, write=False): return utils_ppath(profile, profile_dir, query, default, write=write) language = profile.get("language", "") # Inputs stt_system = profile.get("speech_to_text.system") stt_prefix = f"speech_to_text.{stt_system}" # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist") sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini") sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir") base_dictionary = ppath(f"{stt_prefix}.base_dictionary", "base_dictionary.txt") base_language_model = ppath(f"{stt_prefix}.base_language_model", "base_language_model.txt") base_language_model_weight = float( profile.get(f"{stt_prefix}.mix_weight", 0)) g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst") acoustic_model_type = stt_system if acoustic_model_type == "pocketsphinx": acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model") kaldi_dir = None elif acoustic_model_type == "kaldi": kaldi_dir = Path( os.path.expandvars( profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi"))) acoustic_model = ppath(f"{stt_prefix}.model_dir", "model") else: assert False, f"Unknown acoustic model type: {acoustic_model_type}" # ignore/upper/lower word_casing = profile.get("speech_to_text.dictionary_casing", "ignore").lower() # default/ignore/upper/lower g2p_word_casing = profile.get("speech_to_text.g2p_casing", word_casing).lower() # all/first dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule", "all").lower() # Kaldi kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph", "graph") # Outputs dictionary = ppath(f"{stt_prefix}.dictionary", "dictionary.txt", write=True) custom_words = ppath(f"{stt_prefix}.custom_words", "custom_words.txt", write=True) language_model = ppath(f"{stt_prefix}.language_model", "language_model.txt", write=True) base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst", "base_language_model.fst", write=True) intent_graph = ppath("intent.fsticiffs.intent_graph", "intent.json", write=True) intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True) vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True) unknown_words = ppath(f"{stt_prefix}.unknown_words", "unknown_words.txt", write=True) grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True) fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True) slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True) # ----------------------------------------------------------------------------- # Create cache directories for dir_path in [grammar_dir, fsts_dir]: dir_path.mkdir(parents=True, exist_ok=True) # ----------------------------------------------------------------------------- ini_paths: List[Path] = [] if sentences_ini.is_file(): ini_paths = [sentences_ini] # Add .ini files from intents directory if sentences_dir.is_dir(): for ini_path in sentences_dir.rglob("*.ini"): ini_paths.append(ini_path) # Join ini files into a single combined file and parse _LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths]) with io.StringIO() as combined_ini_file: for ini_path in ini_paths: combined_ini_file.write(ini_path.read_text()) print("", file=combined_ini_file) intents = parse_ini(combined_ini_file.getvalue()) # ----------------------------------------------------------------------------- def get_slot_names(item): """Yield referenced slot names.""" if isinstance(item, jsgf.SlotReference): yield item.slot_name elif isinstance(item, jsgf.Sequence): for sub_item in item.items: for slot_name in get_slot_names(sub_item): yield slot_name elif isinstance(item, jsgf.Rule): for slot_name in get_slot_names(item.rule_body): yield slot_name def number_transform(word): """Automatically transform numbers""" if not isinstance(word, jsgf.Word): # Skip anything besides words return try: n = int(word.text) # 75 -> (seventy five):75 number_text = num2words(n, lang=language).replace("-", " ").strip() assert number_text, f"Empty num2words result for {n}" number_words = number_text.split() if len(number_words) == 1: # Easy case, single word word.text = number_text word.substitution = str(n) else: # Hard case, split into mutliple Words return jsgf.Sequence( text=number_text, type=jsgf.SequenceType.GROUP, substitution=str(n), items=[jsgf.Word(w) for w in number_words], ) except ValueError: # Not a number pass def do_intents_to_graph(intents, slot_names, targets): sentences, replacements = ini_jsgf.split_rules(intents) # Load slot values for slot_name in slot_names: slot_path = slots_dir / slot_name assert slot_path.is_file(), f"Missing slot file at {slot_path}" # Parse each non-empty line as a JSGF sentence slot_values = [] with open(slot_path, "r") as slot_file: for line in slot_file: line = line.strip() if line: sentence = jsgf.Sentence.parse(line) slot_values.append(sentence) # Replace $slot with sentences replacements[f"${slot_name}"] = slot_values if profile.get("intent.replace_numbers", True): # Replace numbers in parsed sentences for intent_sentences in sentences.values(): for sentence in intent_sentences: jsgf.walk_expression(sentence, number_transform, replacements) # Convert to directed graph graph = intents_to_graph(intents, replacements) # Write graph to JSON file json_graph = graph_to_json(graph) with open(targets[0], "w") as graph_file: json.dump(json_graph, graph_file) def task_ini_graph(): """sentences.ini -> intent.json""" slot_names = set() for intent_name in intents: for item in intents[intent_name]: for slot_name in get_slot_names(item): slot_names.add(slot_name) # Add slot files as dependencies deps = [(slots_dir / slot_name) for slot_name in slot_names] # Add profile itself as a dependency profile_json_path = profile_dir / "profile.json" if profile_json_path.is_file(): deps.append(profile_json_path) return { "file_dep": ini_paths + deps, "targets": [intent_graph], "actions": [(do_intents_to_graph, [intents, slot_names])], } # ----------------------------------------------------------------------------- def do_graph_to_fst(intent_graph, targets): with open(intent_graph, "r") as graph_file: json_graph = json.load(graph_file) graph = json_to_graph(json_graph) graph_fst = graph_to_fst(graph) # Create symbol tables isymbols = fst.SymbolTable() for symbol, number in graph_fst.input_symbols.items(): isymbols.add_symbol(symbol, number) osymbols = fst.SymbolTable() for symbol, number in graph_fst.output_symbols.items(): osymbols.add_symbol(symbol, number) # Compile FST compiler = fst.Compiler(isymbols=isymbols, osymbols=osymbols, keep_isymbols=True, keep_osymbols=True) compiler.write(graph_fst.intent_fst) compiled_fst = compiler.compile() # Write to file compiled_fst.write(str(targets[0])) def task_intent_fst(): """intent.json -> intent.fst""" return { "file_dep": [intent_graph], "targets": [intent_fst], "actions": [(do_graph_to_fst, [intent_graph])], } # ----------------------------------------------------------------------------- @create_after(executed="intent_fst") def task_language_model(): """Creates an ARPA language model from intent.fst.""" if base_language_model_weight > 0: yield { "name": "base_lm_to_fst", "file_dep": [base_language_model], "targets": [base_language_model_fst], "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"], } # FST -> n-gram counts intent_counts = str(intent_fst) + ".counts" yield { "name": "intent_counts", "file_dep": [intent_fst], "targets": [intent_counts], "actions": ["ngramcount %(dependencies)s %(targets)s"], } # n-gram counts -> model intent_model = str(intent_fst) + ".model" yield { "name": "intent_model", "file_dep": [intent_counts], "targets": [intent_model], "actions": ["ngrammake %(dependencies)s %(targets)s"], } if base_language_model_weight > 0: merged_model = Path(str(intent_model) + ".merge") # merge yield { "name": "lm_merge", "file_dep": [base_language_model_fst, intent_model], "targets": [merged_model], "actions": [ f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s" ], } intent_model = merged_model # model -> ARPA yield { "name": "intent_arpa", "file_dep": [intent_model], "targets": [language_model], "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"], } # ----------------------------------------------------------------------------- def do_vocab(targets): with open(targets[0], "w") as vocab_file: input_symbols = fst.Fst.read(str(intent_fst)).input_symbols() for i in range(input_symbols.num_symbols()): # Critical that we use get_nth_key here when input symbols # numbering is discontiguous. key = input_symbols.get_nth_key(i) symbol = input_symbols.find(key).decode().strip() if symbol and not (symbol.startswith("__") or symbol.startswith("<")): print(symbol, file=vocab_file) if base_language_model_weight > 0: # Add all words from base dictionary with open(base_dictionary, "r") as dict_file: for word in read_dict(dict_file): print(word, file=vocab_file) @create_after(executed="language_model") def task_vocab(): """Writes all vocabulary words to a file from intent.fst.""" return { "file_dep": [intent_fst], "targets": [vocab], "actions": [do_vocab] } # ----------------------------------------------------------------------------- def do_dict(dictionary_paths: Iterable[Path], targets): with open(targets[0], "w") as dictionary_file: if unknown_words.exists(): unknown_words.unlink() dictionary_format = FORMAT_CMU if acoustic_model_type == "julius": dictionary_format = FORMAT_JULIUS make_dict( vocab, dictionary_paths, dictionary_file, unknown_path=unknown_words, dictionary_format=dictionary_format, merge_rule=dict_merge_rule, upper=(word_casing == "upper"), lower=(word_casing == "lower"), ) if unknown_words.exists() and g2p_model.exists(): # Generate single pronunciation guesses _LOGGER.debug("Guessing pronunciations for unknown word(s)") g2p_output = subprocess.check_output( [ "phonetisaurus-apply", "--model", str(g2p_model), "--word_list", str(unknown_words), "--nbest", "1", ], universal_newlines=True, ) g2p_transform = lambda w: w if g2p_word_casing == "upper": g2p_transform = lambda w: w.upper() elif g2p_word_casing == "lower": g2p_transform = lambda w: w.lower() # Append to dictionary and custom words with open(custom_words, "a") as words_file: with open(unknown_words, "w") as unknown_words_file: for line in g2p_output.splitlines(): line = line.strip() word, phonemes = re.split(r"\s+", line, maxsplit=1) word = g2p_transform(word) print(word, phonemes, file=dictionary_file) print(word, phonemes, file=words_file) print(word, phonemes, file=unknown_words_file) @create_after(executed="vocab") def task_vocab_dict(): """Creates custom pronunciation dictionary based on desired vocabulary.""" dictionary_paths = [base_dictionary] if custom_words.exists(): # Custom dictionary goes first so that the "first" dictionary merge # rule will choose pronunciations from it. dictionary_paths.insert(0, custom_words) # Exclude dictionaries that don't exist dictionary_paths = [p for p in dictionary_paths if p.exists()] return { "file_dep": [vocab] + dictionary_paths, "targets": [dictionary], "actions": [(do_dict, [dictionary_paths])], } # ----------------------------------------------------------------------------- @create_after(executed="vocab_dict") def task_kaldi_train(): """Creates HCLG.fst for a Kaldi nnet3 or gmm model.""" if acoustic_model_type == "kaldi": return { "file_dep": [dictionary, language_model], "targets": [kaldi_graph_dir / "HCLG.fst"], "actions": [[ "bash", str(acoustic_model / "train.sh"), str(kaldi_dir), str(acoustic_model), str(dictionary), str(language_model), ]], } # ----------------------------------------------------------------------------- errors = [] class MyReporter(ConsoleReporter): def add_failure(self, task, exception): super().add_failure(task, exception) errors.append(f"{task}: {exception}") def runtime_error(self, msg): super().runtime_error(msg) errors.append(msg) DOIT_CONFIG = {"action_string_formatting": "old", "reporter": MyReporter} # Monkey patch inspect to make doit work inside Pyinstaller. # It grabs the line numbers of functions probably for debugging reasons, but # PyInstaller doesn't seem to keep that information around. # # This better thing to do would be to create a custom TaskLoader. import inspect inspect.getsourcelines = lambda obj: [0, 0] # Run doit main result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:]) return (result, errors)
def sentences_to_graph( sentences_dict: typing.Dict[str, str], slots_dirs: typing.Optional[typing.List[Path]] = None, slot_programs_dirs: typing.Optional[typing.List[Path]] = None, replace_numbers: bool = True, language: str = "en", word_transform: typing.Optional[typing.Callable[[str], str]] = None, add_intent_weights: bool = True, ) -> nx.DiGraph: """Transform sentences to an intent graph""" slots_dirs = slots_dirs or [] slot_programs_dirs = slot_programs_dirs or [] # Parse sentences and convert to graph with io.StringIO() as ini_file: # Join as single ini file for lines in sentences_dict.values(): print(lines, file=ini_file) print("", file=ini_file) # Parse JSGF sentences intents = rhasspynlu.parse_ini(ini_file.getvalue()) # Split into sentences and rule/slot replacements sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents) word_visitor: typing.Optional[typing.Callable[[Expression], typing.Union[ bool, Expression]]] = None if word_transform: # Apply transformation to words def transform_visitor(word: Expression): if isinstance(word, Word): assert word_transform new_text = word_transform(word.text) # Preserve case by using original text as substition if (word.substitution is None) and (new_text != word.text): word.substitution = word.text word.text = new_text return word word_visitor = transform_visitor # Apply case/number transforms if word_visitor or replace_numbers: for intent_sentences in sentences.values(): for sentence in intent_sentences: if replace_numbers: # Replace number ranges with slot references # type: ignore rhasspynlu.jsgf.walk_expression( sentence, rhasspynlu.number_range_transform, replacements) if word_visitor: # Do case transformation # type: ignore rhasspynlu.jsgf.walk_expression(sentence, word_visitor, replacements) # Load slot values slot_replacements = rhasspynlu.get_slot_replacements( intents, slots_dirs=slots_dirs, slot_programs_dirs=slot_programs_dirs, slot_visitor=word_visitor, ) # Merge with existing replacements for slot_key, slot_values in slot_replacements.items(): replacements[slot_key] = slot_values if replace_numbers: # Do single number transformations for intent_sentences in sentences.values(): for sentence in intent_sentences: rhasspynlu.jsgf.walk_expression( sentence, lambda w: rhasspynlu.number_transform(w, language), replacements, ) # Convert to directed graph intent_graph = rhasspynlu.sentences_to_graph( sentences, replacements=replacements, add_intent_weights=add_intent_weights) return intent_graph, slot_replacements
def make_summary(targets): """Writes summary CSV.""" with open(targets[0], "w") as out_file: writer = csv.DictWriter( out_file, fieldnames=[ "dataset", "profile", "training_seconds", "transcription_accuracy", "intent_entity_accuracy", "average_transcription_speedup", "average_recognize_seconds", "num_wavs", "num_sentences", ], ) writer.writeheader() for p in _PROFILES: sentences_ini = p.out_profile_dir / "sentences.ini" slots_dir = p.out_profile_dir / "slots" report_json = p.results_dir / "report.json" train_results = p.results_dir / "train-profile.txt" with open(report_json, "r") as report_file: report = json.load(report_file) # Get training time training_time = "" with open(train_results, "r") as training_file: for line in training_file: line = line.strip().lower() if line.startswith("training completed in"): training_time = "{0:.02f}".format( float(line.split()[3])) # Get sentence count sentence_count = 0 with open(sentences_ini, "r") as sentences_file: intents = rhasspynlu.parse_ini(sentences_file) sentences, replacements = rhasspynlu.ini_jsgf.split_rules( intents) if slots_dir.is_dir(): slot_replacements = rhasspynlu.slots.get_slot_replacements( intents, slots_dirs=[slots_dir]) # Merge with existing replacements for slot_key, slot_values in slot_replacements.items(): replacements[slot_key] = slot_values # Calculate number of possible sentences per intent intent_counts = rhasspynlu.ini_jsgf.get_intent_counts( sentences, replacements, exclude_slots=False) sentence_count = sum(intent_counts.values()) # Calculate average recognition time recognize_seconds = [] for actual_value in report["actual"].values(): recognize_seconds.append(actual_value["recognize_seconds"]) # Write CSV row writer.writerow({ "dataset": p.dataset, "profile": p.profile, "training_seconds": training_time, "transcription_accuracy": "{0:.02f}".format(report["transcription_accuracy"]), "intent_entity_accuracy": "{0:.02f}".format(report["intent_entity_accuracy"]), "average_transcription_speedup": "{0:.02f}".format(report["average_transcription_speedup"]), "num_wavs": report["num_wavs"], "num_sentences": sentence_count, "average_recognize_seconds": sum(recognize_seconds) / len(recognize_seconds), })