Exemplo n.º 1
0
class PadatiousExtractor(IntentExtractor):
    keyword_based = False

    def __init__(self, cache_dir=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # TODO xdg data_dir
        data_dir = expanduser(self.config.get("data_dir", "~/.padatious"))
        cache_dir = cache_dir or join(data_dir, "padatious")
        self.lock = Lock()
        self.container = IntentContainer(cache_dir)
        self.registered_intents = []

    def detach_intent(self, intent_name):
        if intent_name in self.registered_intents:
            LOG.debug("Detaching padatious intent: " + intent_name)
            with self.lock:
                self.container.remove_intent(intent_name)
            self.registered_intents.remove(intent_name)

    def detach_skill(self, skill_id):
        LOG.debug("Detaching padatious skill: " + str(skill_id))
        remove_list = [i for i in self.registered_intents if skill_id in i]
        for i in remove_list:
            self.detach_intent(i)

    def register_entity(self, entity_name, samples=None, reload_cache=True):
        samples = samples or [entity_name]
        with self.lock:
            self.container.add_entity(entity_name,
                                      samples,
                                      reload_cache=reload_cache)

    def register_intent(self, intent_name, samples=None, reload_cache=True):
        samples = samples or [intent_name]
        if intent_name not in self._intent_samples:
            self._intent_samples[intent_name] = samples
        else:
            self._intent_samples[intent_name] += samples
        with self.lock:
            self.container.add_intent(intent_name,
                                      samples,
                                      reload_cache=reload_cache)
        self.registered_intents.append(intent_name)

    def register_entity_from_file(self,
                                  entity_name,
                                  file_name,
                                  reload_cache=True):
        with self.lock:
            self.container.load_entity(entity_name,
                                       file_name,
                                       reload_cache=reload_cache)

    def register_intent_from_file(self,
                                  intent_name,
                                  file_name,
                                  single_thread=True,
                                  timeout=120,
                                  reload_cache=True,
                                  force_training=True):
        try:
            with self.lock:
                self.container.load_intent(intent_name,
                                           file_name,
                                           reload_cache=reload_cache)
            self.registered_intents.append(intent_name)
            success = self._train(single_thread=single_thread,
                                  timeout=timeout,
                                  force_training=force_training)
            if success:
                LOG.debug(file_name + " trained successfully")
            else:
                LOG.error(file_name + " FAILED TO TRAIN")

        except Exception as e:
            LOG.exception(e)

    def _get_remainder(self, intent, utterance):
        if intent["name"] in self.intent_samples:
            return get_utterance_remainder(
                utterance, samples=self.intent_samples[intent["name"]])
        return utterance

    def calc_intent(self, utterance, min_conf=None):
        min_conf = min_conf or self.config.get("padatious_min_conf", 0.65)
        utterance = utterance.strip().lower()
        with self.lock:
            intent = self.container.calc_intent(utterance).__dict__
        if intent["conf"] < min_conf:
            return {
                "intent_type": "unknown",
                "entities": {},
                "conf": 0,
                "intent_engine": "padatious",
                "utterance": utterance,
                "utterance_remainder": utterance
            }
        intent["utterance_remainder"] = self._get_remainder(intent, utterance)
        intent["entities"] = intent.pop("matches")
        intent["intent_engine"] = "padatious"
        intent["intent_type"] = intent.pop("name")
        intent["utterance"] = intent.pop("sent")

        if isinstance(intent["utterance"], list):
            intent["utterance"] = " ".join(intent["utterance"])
        return intent

    def intent_scores(self, utterance):
        utterance = utterance.strip().lower()
        intents = [i.__dict__ for i in self.container.calc_intents(utterance)]
        for idx, intent in enumerate(intents):
            intent["utterance_remainder"] = self._get_remainder(
                intent, utterance)
            intents[idx]["entities"] = intents[idx].pop("matches")
            intents[idx]["intent_type"] = intents[idx].pop("name")
            intent["intent_engine"] = "padatious"
            intent["utterance"] = intent.pop("sent")
            if isinstance(intents[idx]["utterance"], list):
                intents[idx]["utterance"] = " ".join(intents[idx]["utterance"])
        return intents

    def calc_intents(self, utterance, min_conf=None):
        min_conf = min_conf or self.config.get("padatious_min_conf", 0.65)
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            intent = self.calc_intent(ut)
            if intent["conf"] < min_conf:
                bucket[ut] = None
            else:
                bucket[ut] = intent
        return bucket

    def calc_intents_list(self, utterance):
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            bucket[ut] = self.filter_intents(ut)
        return bucket

    def manifest(self):
        # TODO vocab, skill ids, intent_data
        return {"intent_names": self.registered_intents}

    def _train(self, single_thread=True, timeout=120, force_training=True):
        with self.lock:
            return self.container.train(single_thread=single_thread,
                                        timeout=timeout,
                                        force=force_training,
                                        debug=True)
Exemplo n.º 2
0
class Skill(object):
    def __init__(self, root_dir, name, nlp, active):
        self._root_dir = root_dir
        self._name = name
        self._nlp = nlp
        self._active = active

        self._intents_container = None
        self._adapt_intent_engine = None

        self.initialize_intent_parser()

    def is_active(self):
        return self._active

    def get_name(self):
        return self._name

    def initialize_intent_parser(self):

        self._intents_container = IntentContainer("%s_cache" % self._name)

        self._adapt_intent_engine = DomainIntentDeterminationEngine()
        self._adapt_intent_engine.register_domain(self._name)

        for intent_name, intent_file_path in self.get_intent_names():
            #print ("###### IntentBuilder: %s, %s" % (intent_name, intent_file_path))
            adapt_intent_builder = IntentBuilder(intent_name)
            for intent_name, intent_example_sentences_array in self.intent_training_file_content(
                    intent_file_path, 'intent'):
                #print ("add intent %s, %s" % (intent_name, intent_example_sentences_array))
                self._intents_container.add_intent(
                    intent_name, intent_example_sentences_array)

            for entity_name, entities_array in self.intent_training_file_content(
                    intent_file_path, 'entities'):
                #print ("add entity %s, %s " % (entity_name, entities_array))
                self._intents_container.add_entity(entity_name, entities_array)

                # adapt
                if entity_name.endswith("_keyword"):
                    for k in entities_array:
                        #print ("add keyword %s to %s" % (k, intent_name))
                        self._adapt_intent_engine.register_entity(
                            k, entity_name, domain=self._name)

                    adapt_intent_builder.require(entity_name)

            adapt_intent = adapt_intent_builder.build()
            self._adapt_intent_engine.register_intent_parser(adapt_intent,
                                                             domain=self._name)

        self._intents_container.train(debug=False)

    def get_intent_file_content(self, skill_file_path):
        content_array = []
        with open(skill_file_path, 'r', encoding='utf-8') as skill_file:
            for entry in skill_file:
                content_array.append(entry)
        return content_array

    def get_entities_file_content(self, skill_file_path, allow_variations):
        content_array = []
        with open(skill_file_path, 'r', encoding='utf-8') as skill_file:
            for entry in skill_file:
                entries, variations = entry.strip().split('|'), []
                content_array.append(entries[0])
                if allow_variations:
                    if len(entries) > 1:
                        content_array.extend(entries[1].split(','))
        return content_array

    def get_intent_names(self):
        intent_root_file_path = os.path.join(self._root_dir, self._name,
                                             'intents')
        for intent_name in os.listdir(intent_root_file_path):
            intent_file_path = os.path.join(intent_root_file_path, intent_name)
            yield intent_name, intent_file_path

    def intent_training_file_content(self,
                                     artefacts_root_dir,
                                     artefact_file_extension,
                                     allow_variations=True):
        for artefact_file_path in os.listdir(artefacts_root_dir):
            if artefact_file_path.endswith('.' + artefact_file_extension):
                artefact_name = artefact_file_path.replace(
                    '.' + artefact_file_extension, '')
                if artefact_file_extension is 'entities':
                    artefact_file_lines = self.get_entities_file_content(
                        os.path.join(artefacts_root_dir, artefact_file_path),
                        allow_variations)
                elif artefact_file_extension is 'intent':
                    artefact_file_lines = self.get_intent_file_content(
                        os.path.join(artefacts_root_dir, artefact_file_path))
                yield artefact_name, artefact_file_lines

    def expand_intents(self, include_additional_entities=False):
        # load entities first in the file and build a dictionary
        result = dict()
        entities_dict = dict()

        for intent_name, intent_file_path in self.get_intent_names():

            for entity_type, entities_array in self.intent_training_file_content(
                    intent_file_path, 'entities', False):
                entities_dict[entity_type] = entities_array

            # load intents again from file
            for intent_type, intent_array in self.intent_training_file_content(
                    intent_file_path, 'intent'):
                intent_sentences = set()
                for line in intent_array:
                    line_tokens = self._nlp.tokenization.tokenize(line)
                    expanded = expand_parentheses(line_tokens)
                    for sentence_tokens in expanded:
                        sentence = self._nlp.tokenization.detokenize(
                            sentence_tokens)
                        fieldnames = [
                            fname
                            for _, fname, _, _ in Formatter().parse(sentence)
                            if fname
                        ]
                        fields_dict = dict()
                        for fieldname in fieldnames:
                            if fieldname in entities_dict:
                                fields_dict[fieldname] = entities_dict[
                                    fieldname].copy()
                            else:
                                if include_additional_entities:
                                    field_values = self.get_additional_entities(
                                        fieldname)
                                    if len(field_values) > 0:
                                        fields_dict[fieldname] = field_values

                        if len(fields_dict) > 0:
                            keys, values = zip(*fields_dict.items())
                            permutations = [
                                dict(zip(keys, v))
                                for v in itertools.product(*values)
                            ]
                            for p in permutations:
                                entities_dict_permutation = EntitiesDict(p)
                                intent_sentences.add(
                                    sentence.format(
                                        **entities_dict_permutation))
                        else:
                            intent_sentences.add(sentence)

                result[intent_type] = list(intent_sentences)

        return result

    def get_additional_entities(self, fieldname):
        return []

    def calculate_intent(self, text):
        text = self._nlp.preprocess(text)

        # example result
        # {'intent_type': 'beth.fydd.y.tywydd', 'confidence': 1.0, 'target': None, 'keyword': 'tywydd'}
        #
        #print ("evaluating: %s with adapt:" % text)
        adapt_best_confidence = 0.0
        adapt_result = self._adapt_intent_engine.determine_intent(text)
        for a in adapt_result:
            # print (a)
            if a["confidence"] > adapt_best_confidence:
                adapt_best_confidence = a["confidence"]

        # example result
        # {'sent': "beth yw ' r tywydd", 'name': 'beth.ywr.tywydd', 'conf': 1.0, 'matches': {'tywydd_keyword': 'tywydd?'}}
        #
        #print ("evaluating: %s with padatious:" % text)
        padatious_result = self._intents_container.calc_intent(text)

        return adapt_best_confidence, padatious_result

    def handle(self, intent, latitude, longitude):
        pass