def create_nlp_model() -> SnipsNLUEngine: """ This function trains a new ML model from the given dataset. It then saves the model in the root directory of the project with the file name: nlpumodel This function will only be called once, at the start of the program, if nlumodel file is not detected in the current directory Parameters required: None Return data: Trained SnipsNLUEngine object """ # Creating a barebones engine engine = SnipsNLUEngine(config=CONFIG_EN) # Creating dataset from yaml files present in nlputrain directory data = dataset.Dataset.from_yaml_files( "en", ["./nlputrain/" + i for i in os.listdir("./nlputrain/") if ".yaml" in i] ) # Training the engine with given dataset engine.fit(data) # Persisting the engine so it can be used easily later # Persisting engine is saved in nlumodel folder try: engine.persist("nlumodel") except PersistingError: print("Old NLP file still exists. Deleting..") # Removing old model files using shutil shutil.rmtree("nlumodel") engine.persist("nlumodel") print("NLP model has been created and saved in directory: nlumodel") # Returning trained engine return engine
def init_snipsnlu(): # engine = SnipsNLUEngine(config=CONFIG_EN) engine = SnipsNLUEngine(resources=load_resources("snips_nlu_en")) with io.open("proj.json") as f: dataset = json.load(f) engine.fit(dataset) return engine
class NLUTrainer: def __init__(self, lang, config_file): self.__lang = lang self.__config_file = config_file self.__ongoing_training = False load_resources(self.__lang) with io.open(self.__config_file) as f: self.__config = json.load(f) self.__engine = SnipsNLUEngine(config=self.__config) @property def ongoing_training(self): return self.__ongoing_training def train(self, json_dataset, trained_engine_file): self.__ongoing_training = True with io.open(json_dataset) as f: dataset = json.load(f) self.__engine.fit(dataset) engine_json = json.dumps(self.__engine.to_dict()) with io.open(trained_engine_file, mode="w") as f: f.write(engine_json) self.__ongoing_training = False return
class SnipsService(ApiService): def __init__(self, classes, model_path=None, max_api_calls=None, verbose=False): super().__init__(classes, max_api_calls, verbose) load_resources('en') if model_path: self.load_model(model_path) else: self.engine = SnipsNLUEngine(config=CONFIG_EN) def train_model(self, dataset): self.engine.fit(dataset) def train_model_from_file(self, dataset_path): with io.open(dataset_path) as f: self.train_model(json.load(f)) def save_model(self, model_path): self.engine.persist(model_path) def load_model(self, model_path): self.engine = SnipsNLUEngine.from_path(model_path) def predict(self, utterance): result = self.engine.parse(utterance) try: return result['intent']['intentName'] except Exception as e: print('ERR:', e) print('Failed to parse: "{}"'.format(utterance)) print(result) return None
def train_model(model_file_path): global nlu_engine print("reading model at {}".format(model_file_path)) with io.open(model_file_path) as f: model = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) print("training model") nlu_engine.fit(model)
def nluparse(text): load_resources(sample_dataset["language"]) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) # text = "Show me jobs in LA for today" parsing = nlu_engine.parse(text) return json.dumps(parsing, indent=2)
def conversacion(m): with io.open("training.json") as f: sample_dataset = json.load(f) load_resources("en") nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine.fit(sample_dataset) text = (u"" + m.text.lower() + "") listaResultado = nlu_engine.parse(text) return procesarRespuesta(listaResultado)
def train_eval_snips_nlu_model(lang='en', cross=False, save=''): """ Train snips data from all brat annotation object :param lang: abbreviate language name :param save: path where model will be save :return: None :rtype: None """ from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics import pickle import json if cross: train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_snips_data_task1() print("--> Evaluating training data with Snips metrics...") filename_results = source_result / "snips_semeval_2020_evaluation_task1_{}.pkl".format(save) if not Path(filename_results).exists(): tt_metrics = compute_train_test_metrics(train_dataset=train_data[0], test_dataset=train_data[1], engine_class=SnipsNLUEngine, include_slot_metrics=False) #print(tt_metrics) if not Path(filename_results).exists(): print("--> Writing snips nlu metrics data to file...") with codecs.open(filename_results, 'wb') as metric: pickle.dump(tt_metrics, metric) from datetime import datetime dmtime = "_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S")) name = "snips_semeval_2020_evaluation_task1{}.json".format(dmtime) filename_results_json = source_result / name with codecs.open(filename_results_json, 'w', "utf-8") as m_json: json.dump(tt_metrics, m_json) else: filename_results = source_result / "snips_semeval_2020_model_task1_{}".format(save) train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_snips_data_task1() nlu_engine = SnipsNLUEngine(config=CONFIG_EN) print("--> Training patent data with Snips...") nlu_engine.fit(train_data) try: print("--> Saving model trained with Snips (JOBLIB)...") filename_joblib = source_result / "snips_semeval_2020_model_task1_{}.pkl".format(save) with codecs.open(filename_joblib, 'wb') as metric: pickle.dump(nlu_engine, metric) except: pass print("--> Saving model trained with Snips (SNIPS)...") try: nlu_engine.persist(filename_results) except: pass
def train(dataset_file_path, train_directory): with io.open(dataset_file_path) as f: dataset = json.load(f) language = dataset.get("language", None) config = language_configs.get(language, None) if config is None: raise Exception( f"No language configuration for language {dataset.language}") nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(dataset) nlu_engine.persist(train_directory)
def runEngine(query): with io.open("dataset.json") as f: dataset = json.load(f) load_resources("en") #with io.open("config_en.json") as f: # config = json.load(f) #engine = SnipsNLUEngine(config=config) engine = SnipsNLUEngine(config=CONFIG_EN) engine.fit(dataset) parsing = engine.parse(query) return json.dumps(parsing, indent=2)
def get_nlu_engine(dataset_json_path: str) -> SnipsNLUEngine: with io.open(dataset_json_path) as f: dataset = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(dataset) return nlu_engine
class NLU: def __init__(self): self.sample_dataset = NLU.load_dataset() @staticmethod def load_dataset(): """ Load the sample dataset which will be used to train the snipsnlu NLP engine. :return: """ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) try: # with io.open(str(Path('app', 'static', 'samples', 'sample_dataset.json'))) as fr: # sample_dataset = json.load(fr) # return sample_dataset with open(f'{BASE_DIR}/static/sample/test.json', 'r', encoding='utf-8') as fr: sample_dataset = json.load(fr) return sample_dataset except Exception as e: print("Could not load dataset {}".format(str(e))) def train_engine(self): """ # Setup the snipsnlu NLP engine and pass the training data. :return: """ load_resources("en") self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.nlu_engine.fit(self.sample_dataset) def parse_sentence(self, sentence): """ Get the sentence and parse it to get the result. The sentence is a query made in any natural language(for now we are setting this language as english) and the result is the json string with the parsed help of trained engine and the possible correct prediction of what the query actually meant. :param sentence: :return: """ parsing = self.nlu_engine.parse(sentence) return parsing
def __init__(self): load_resources(u"en") engine = SnipsNLUEngine(config=CONFIG_EN) with io.open(SnipHandler.dataset_path) as f: data_set = json.load(f) self.engine = engine.fit(data_set) SnipHandler.__instance__ = self
def imprimirmensaje(message): campo = "" predicado = "" lista = [] chatid = message.chat.id with io.open('dataset.json') as file: dataset = json.load(file) engine = SnipsNLUEngine() engine.fit(dataset) parsing = engine.parse(unicode(message.text)) temp = json.dumps(parsing, indent=2) # try: intentName = parsing["intent"]["intentName"] entidad = "" try: enti = parsing["slots"][0] entidad = enti["rawValue"] except: pass if len(intentName) > 0 and len(entidad) > 0: print("good") if intentName == "descripcion": print("si entraaaaaaaaaaaaaaaaaaa") predicado = "http://usefulinc.com/ns/doap#description" campo = consultaSparql1(entidad, predicado) elif intentName == "imagen": predicado = "http://schema.org/image" campo = consultaSparql1(entidad, predicado) elif intentName == "igualEn": predicado = "http://www.w3.org/2002/07/owl#sameAs" campo = consultaSparql1(entidad, predicado) elif intentName == "igualKi": predicado = "http://www.w3.org/2002/07/owl#sameAs" campo = consultaSparql1(entidad, predicado) elif intentName == "nombreCientifico": predicado = "http://lod.taxonconcept.org/ontology/txn.owl#scientificName" campo = consultaSparql1(entidad, predicado) bot.send_message(chatid, campo) else: print("bad") lista = consultaSparql() bot.send_message(message, "perro")
def train(): """ Opens all JSON files in DATASET_PATH, fits them to an nlu-engine, and saves the engine at TRAINED_ENGINE_PATH. :return: Nothing. """ file_paths = [ join(DATASET_PATH, file) for file in os.listdir(DATASET_PATH) if isfile(join(DATASET_PATH, file)) ] nlu_engine = SnipsNLUEngine(CONFIG_EN) for file_path in file_paths: with io.open(file_path, encoding="utf-8") as file: sample_dataset = json.load(file) nlu_engine.fit(sample_dataset) save_engine(nlu_engine)
def loadntrain(self, rootpath='./datasets/*.json'): paths = sorted(glob.glob(rootpath)) self.models = [] for i, dset in enumerate(paths): with io.open(dset) as f: dataset = json.load(f) model = SnipsNLUEngine(config=CONFIG_EN) model = model.fit(dataset) self.models.append(model) print(f"{i+1}. Trained for {dset}") print(f"Training for {len(paths)} datasets completed")
def main(): # Load dataset for parsing with open(DATASET_PATH) as f: dataset = json.load(f) global publisher global nlu_engine nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine.fit(dataset) # Initialize Ros node and the Topic subscriber and publisher rospy.init_node(str(ROS_NODE_NAME)) publisher = rospy.Publisher(str(ROS_PUBLISHER_TOPIC), String, queue_size=10) rospy.Subscriber(str(ROS_SUBSCRIBER_TOPIC), String, callback) print("ROS node '%s' started. Listening from '%s' (ctrl-C to exit)..." % (ROS_NODE_NAME, ROS_SUBSCRIBER_TOPIC)) rospy.spin() print("Ctrl-C received. Shutting down ROS node '%s'!" % ROS_NODE_NAME)
class LanguageParser(pluginmanager.IPlugin, PluginStorage): """ interface to parse input text """ def __init__(self): super(pluginmanager.IPlugin, self).__init__() self._plugins = {} self._pre_train_json = dict() self._pre_train_json['intents'] = {} self._pre_train_json['entities'] = {} self._pre_train_json['language'] = 'en' self.nlu_engine = SnipsNLUEngine() def train(self, plugins): self._generate_pre_train_json(plugins) self.nlu_engine.fit(self._pre_train_json) def _generate_pre_train_json(self, plugins): for plugin in plugins: intent = dict() intent['utterances'] = list() _data = list() _data.append(dict({'text': plugin.get_name()})) intent['utterances'].append(dict({"data": _data})) intent_name = '_'.join(re.findall(r"[\w']+", plugin.get_name())) self._pre_train_json['intents'][intent_name] = intent self._plugins[intent_name] = plugin # handle sub commands (recursive) self._generate_pre_train_json(plugin.get_plugins().values()) def identify_action(self, action): parsed_action = self.nlu_engine.parse(action) print(parsed_action) intent_name = parsed_action['intent']['intentName'] if intent_name not in self._plugins: return None return self._plugins[intent_name]
def make_nlu_model_json(fname): docs = yaml.load_all(stream) ll = [] for doc in docs: i = {} for k, v in doc.items(): i[k] = v ll.append(i) dataset = Dataset.from_yaml_files("en", [ll]) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(dataset) text = "Please turn the light on in the kitchen" parsing = nlu_engine.parse(text)
class __impl: __nlu_engine = None def __init__(self): print('Load NLU Engine') print( '-----------------------------------------------------------------' ) try: with io.open("oms_dataset.json") as f: dataset = json.load(f) except: print('I/O error({0}): {1}') sys.exit() load_resources('snips_nlu_en') self.__nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.__nlu_engine.fit(dataset) self.__nlu_engine.to_byte_array() def parse_text(self, text): return self.__nlu_engine.parse(text)
def init(seed): with io.open("Dataset/dataset.json",encoding="utf8") as f: sample_dataset = json.load(f) print("initialising Lynda") from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN nlu_engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed) #training the nlu model nlu_engine.fit(sample_dataset) print("momdel created") nlu_engine.persist('model') # saving the trained model print("model dumped")
def main(): """ This builds a training dataset, trains an NLU engine with it, and saves that engine. This must be done any time a new command is added or utterances are edited for a command. If an NLU engine already exists, it is deleted. """ training_json = json.loads(build_training_dataset()) engine_path = os.path.join('Voithos', 'utilities', 'NLU') nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(training_json) try: nlu_engine.persist(engine_path) except PersistingError: shutil.rmtree(engine_path) nlu_engine.persist(engine_path)
def train_nlu(): with io.open("training_data/dataset.json") as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine() print("Snips training started") train_start = datetime.datetime.now() nlu_engine = nlu_engine.fit(sample_dataset) if os.path.exists(model_path): shutil.rmtree(model_path, ignore_errors=True) nlu_engine.persist(model_path) else: nlu_engine.persist(model_path) train_end = datetime.datetime.now() print("Total time to train the Snips model: {0}".format(train_end - train_start))
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. # 언어 설정코드 변경 가능 language_code = 'ko-KR' # a BCP-47 language tag # 언어 데이터셋을 Snips NLU에 넣어주기 with io.open("./lights_dataset_train_ko.json", encoding="utf8") as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_KO) nlu_engine = nlu_engine.fit(sample_dataset) print("성공") # 전처리 종료 # 음성인식 시작 client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. # 무한루프 시작 listen_print_loop(responses, nlu_engine)
def imprimirmensaje(message): campo = "" predicado = "" bandera1 = False chatid = message.chat.id with io.open('dataset.json') as file: dataset = json.load(file) engine = SnipsNLUEngine() engine.fit(dataset) parsing = engine.parse(unicode(message.text)) entidad = "" intentName = "" try: intentName = parsing["intent"]["intentName"] try: enti = parsing["slots"][0] entidad = enti["rawValue"] except: bandera1 = True if len(intentName) > 0 and len(entidad) > 0: print("good") if intentName == "descripcion": print("si entraaaaaaaaaaaaaaaaaaa") predicado = "http://usefulinc.com/ns/doap#description" campo = consultaSparql1(entidad, predicado) elif intentName == "imagen": predicado = "http://schema.org/image" campo = consultaSparql1(entidad, predicado) campo = ("La imagen de " + entidad + " es: " + campo[0]) elif intentName == "igualEn": predicado = "http://www.w3.org/2002/07/owl#sameAs" campo = consultaSparql2(entidad, predicado, "en") campo = ("La traduccion de " + entidad + " en ingles es: " + campo[0]) elif intentName == "igualKi": predicado = "http://www.w3.org/2002/07/owl#sameAs" campo = consultaSparql2(entidad, predicado, "ki") campo = ("La traduccion de " + entidad + " en kitchwa es: " + campo[0]) elif intentName == "nombreCientifico": predicado = "http://lod.taxonconcept.org/ontology/txn.owl#scientificName" campo = consultaSparql1(entidad, predicado) campo = ("El nombre cientifico de " + entidad + " es: " + campo[0]) mi_bot.reply_to(message, campo) elif intentName == "allAnimal": cam = consultaSparql() campo = "" for ca in cam: campo += ca + ", " mi_bot.reply_to(message, campo) else: print(entidad) print(intentName) print("bad") if bandera1: print("¿Dime de animal deseas saber la descripción?") #except Exception,e: print str(e) except: campo = "No entiendo tu pregunta, puedes preguntarme de nuevo" mi_bot.reply_to(message, campo)
from __future__ import unicode_literals, print_function import io import json from snips_nlu import SnipsNLUEngine, load_resources from snips_nlu.default_configs import CONFIG_EN, CONFIG_DE with io.open("lights_dataset_de.json") as f: sample_dataset = json.load(f) # load_resources("en") # nlu_engine = SnipsNLUEngine(config=CONFIG_EN) load_resources("de") nlu_engine = SnipsNLUEngine(config=CONFIG_DE) nlu_engine.fit(sample_dataset) nlu_engine.persist("models/current")
import json from pprint import pprint from snips_nlu import load_resources, SnipsNLUEngine data = json.load(open('./dataset.json', 'r+')) load_resources('en') nlu_engine = SnipsNLUEngine(config=json.load(open('./config_en.json', 'r+'))) nlu_engine.fit(data) with open('./trained_engine.json', 'w+') as f: f.write(json.dumps(nlu_engine.to_dict()))
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from pathlib import Path #logo = Path('project_4\logo.txt').read_text() #print(logo) #gen_help = Path("project_4\help.txt").read_text() #print(gen_help) #welcome = open('project_4\welcome.txt', "r").read() with io.open("project_4\dataset.json") as f: dataset = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(dataset) ############# Custom classes #from Character as Character import funcs import Layout Layout = Layout.Layout import Info Info = Info.Info import Store import Character Character = Character.Character ################# funcs.hum_type(logo, speed=550)
@author: yanni """ import io import json from snips_nlu import SnipsNLUEngine path = '/Users/yanni/PycharmProjects/chatbot/src/' ### train Slots Detection Model #!snips-nlu generate-dataset en {path}/Movie_intent.yaml {path}/Movie_entity.yaml > {path}/Movie_dataset.json with io.open(path + 'Movie_dataset.json') as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine() nlu_engine.fit(sample_dataset) nlu_engine.persist(path + 'Movie_Slots_Detection') #!snips-nlu generate-dataset en {path}/Aspect_intent.yaml {path}/Aspect_entity.yaml > {path}/Aspect_dataset.json with io.open(path + 'Aspect_dataset.json') as f: aspect_dataset = json.load(f) nlu_engine = SnipsNLUEngine() nlu_engine.fit(aspect_dataset) nlu_engine.persist(path + 'Aspect_Slots_Detection')
# -*- coding: utf-8 -*- from __future__ import unicode_literals import sys import json from snips_nlu import load_resources, SnipsNLUEngine from snips_nlu.default_configs import CONFIG_ES import io # creacion del archivo de entrenamineto load_resources("es") reload(sys) sys.setdefaultencoding('utf8') with io.open("dataset.json") as f: dataset = json.load(f) engine = SnipsNLUEngine(config=CONFIG_ES) engine.fit(dataset) engine_json = json.dumps(engine.to_dict()) with io.open("trained.json", mode="w") as f: f.write(unicode(engine_json))
from __future__ import unicode_literals, print_function import io import json from snips_nlu import SnipsNLUEngine, load_resources with io.open("sample_dataset.json") as f: sample_dataset = json.load(f) with io.open("configs/config_en.json") as f: config = json.load(f) load_resources(sample_dataset["language"]) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) text = "What will be the weather in San Francisco next week?" parsing = nlu_engine.parse(text) print(json.dumps(parsing, indent=2))