示例#1
0
def main_train_engine():
    args = vars(parse_train_args(sys.argv[1:]))

    dataset_path = args.pop("dataset_path")
    with io.open(dataset_path, "r", encoding="utf8") as f:
        dataset = json.load(f)

    if args.get("config_path") is not None:
        config_path = args.pop("config_path")
        with io.open(config_path, "r", encoding="utf8") as f:
            config = json.load(f)
    else:
        config = NLUEngineConfig()

    load_resources(dataset["language"])
    engine = SnipsNLUEngine(config).fit(dataset)
    print("Create and train the engine...")

    output_path = args.pop("output_path")
    serialized_engine = bytes(json.dumps(engine.to_dict()), encoding="utf8")
    with io.open(output_path, "w", encoding="utf8") as f:
        f.write(serialized_engine.decode("utf8"))
    print("Saved the trained engine to %s" % output_path)
示例#2
0
class SnipsInterpreter(Interpreter):
    def __init__(self):
        super(SnipsInterpreter, self).__init__('snips')

        self._meta = None
        self._lang = None
        self._engine = None
        self._entity_parser = None

        self._log.info('Using snips-nlu version %s' % __version__)

    def metadata(self):
        return self._meta

    def lang(self):
        return self._lang

    def fit(self, training_file_path, trained_directory_path):
        filename, _ = os.path.splitext(os.path.basename(training_file_path))

        # TODO check what should be in the base Interpreter class

        trained_path = os.path.join(trained_directory_path,
                                    '%s.trained.json' % filename)
        checksum_path = os.path.join(trained_directory_path,
                                     '%s.checksum' % filename)

        with open(training_file_path) as f:
            training_str = f.read()
            training_data = json.loads(training_str)
            self._lang = training_data['language']
            self._log.info('Loading resources for language %s' % self._lang)
            load_resources(self._lang)

        same, computed_checksum = self.checksum_match(training_str,
                                                      checksum_path)

        # Checksums match, load the engine from trained file
        if same and os.path.isfile(trained_path):
            self._log.info('Checksum matched, loading trained engine')
            with open(trained_path) as f:
                self._engine = SnipsNLUEngine.from_dict(json.load(f))
        else:
            self._log.info('Checksum has changed, retraining the engine')
            self._engine = SnipsNLUEngine()
            self._engine.fit(training_data)

            with open(trained_path, mode='w') as f:
                json.dump(self._engine.to_dict(), f)

            with open(checksum_path, mode='w') as f:
                f.write(computed_checksum)

        self._entity_parser = BuiltinEntityParser(self._lang)
        self._meta = {
            k: list(v.keys())
            for k, v in
            self._engine._dataset_metadata['slot_name_mappings'].items()
        }

    def parse_entity(self, msg, intent, slot):
        entity_label = self._engine._dataset_metadata[
            'slot_name_mappings'].get(intent, {}).get(slot)

        # TODO try to find a way to retrieve multiple slot values, that's a hard one
        # May be we can try matching on _dataset_metadata['entities']

        if is_builtin_entity(entity_label):
            parsed = self._entity_parser.parse(msg)

            if parsed:
                return get_entity_value(parsed[0]['entity'], msg)

        return msg

    def parse(self, msg):

        # TODO manage multiple intents in the same sentence

        parsed = self._engine.parse(msg)

        if parsed['intent'] == None:
            return []

        slots = {}

        # Constructs a slot dictionary with slot value as a list if multiples matched
        for slot in parsed['slots']:
            name = slot['slotName']
            value = get_entity_value(slot['value'])

            if name in slots:
                if type(slots[name]) is not list:
                    slots[name] = [slots[name]]

                slots[name].append(value)
            else:
                slots[name] = value

        return [{
            'text': msg,
            'intent': parsed['intent']['intentName'],
            'slots': slots,
        }]
#!/usr/bin/env python
import io
import json
from snips_nlu import load_resources, SnipsNLUEngine

load_resources("es")

with io.open("dataset.json") as f:
    dataset = json.load(f)

engine = SnipsNLUEngine()

engine.fit(dataset)

engine_json = json.dumps(engine.to_dict())
with io.open("trained.json",mode="w") as f:
    f.write(engine_json)