Пример #1
0
class PadatiousFileIntent(IntentPlugin):
    """Interface for Padatious intent engine"""
    def __init__(self, rt):
        super().__init__(rt)
        self.container = IntentContainer(
            join(rt.paths.user_config, 'intent_cache'))

    def register(self, intent: Any, skill_name: str, intent_id: str):
        file_name = join(self.rt.paths.skill_locale(skill_name),
                         intent + '.intent')
        self.container.load_intent(name=intent_id, file_name=file_name)

    def register_entity(self, entity: Any, entity_id: str, skill_name: str):
        file_name = join(self.rt.paths.skill_locale(skill_name),
                         entity + '.intent')
        self.container.load_intent(name=entity_id, file_name=file_name)

    def unregister(self, intent_id: str):
        self.container.remove_intent(intent_id)

    def unregister_entity(self, entity_id: str):
        self.container.remove_entity(entity_id)

    def compile(self):
        log.info('Training...')
        self.container.train()
        log.info('Training complete!')

    def calc_intents(self, query):
        return [
            IntentMatch(intent_id=data.name,
                        confidence=data.conf,
                        matches=data.matches,
                        query=query)
            for data in self.container.calc_intents(query)
        ]
Пример #2
0
class NeuralNER(RuleNER):
    def __init__(self):
        cache = expanduser("~/.simple_NER")
        if not isdir(cache):
            makedirs(cache)
        self._container = IntentContainer(join(cache, "rule_cache"))
        self._rules = {}
        self._examples = {}

    def extract_entities(self, text, as_json=False):
        for rule in self._container.calc_intents(text):
            for e in rule.matches:
                if as_json:
                    yield Entity(rule.matches[e],
                                 entity_type=e,
                                 source_text=text,
                                 confidence=rule.conf,
                                 rules=self._rules[rule.name]).as_json()
                else:
                    yield Entity(rule.matches[e],
                                 entity_type=e,
                                 source_text=text,
                                 confidence=rule.conf,
                                 rules=self._rules[rule.name])
Пример #3
0
class PadatiousExtractor(IntentExtractor):
    keyword_based = False

    def __init__(self, cache_dir=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # TODO xdg data_dir
        data_dir = expanduser(self.config.get("data_dir", "~/.padatious"))
        cache_dir = cache_dir or join(data_dir, "padatious")
        self.lock = Lock()
        self.container = IntentContainer(cache_dir)
        self.registered_intents = []

    def detach_intent(self, intent_name):
        if intent_name in self.registered_intents:
            LOG.debug("Detaching padatious intent: " + intent_name)
            with self.lock:
                self.container.remove_intent(intent_name)
            self.registered_intents.remove(intent_name)

    def detach_skill(self, skill_id):
        LOG.debug("Detaching padatious skill: " + str(skill_id))
        remove_list = [i for i in self.registered_intents if skill_id in i]
        for i in remove_list:
            self.detach_intent(i)

    def register_entity(self, entity_name, samples=None, reload_cache=True):
        samples = samples or [entity_name]
        with self.lock:
            self.container.add_entity(entity_name,
                                      samples,
                                      reload_cache=reload_cache)

    def register_intent(self, intent_name, samples=None, reload_cache=True):
        samples = samples or [intent_name]
        if intent_name not in self._intent_samples:
            self._intent_samples[intent_name] = samples
        else:
            self._intent_samples[intent_name] += samples
        with self.lock:
            self.container.add_intent(intent_name,
                                      samples,
                                      reload_cache=reload_cache)
        self.registered_intents.append(intent_name)

    def register_entity_from_file(self,
                                  entity_name,
                                  file_name,
                                  reload_cache=True):
        with self.lock:
            self.container.load_entity(entity_name,
                                       file_name,
                                       reload_cache=reload_cache)

    def register_intent_from_file(self,
                                  intent_name,
                                  file_name,
                                  single_thread=True,
                                  timeout=120,
                                  reload_cache=True,
                                  force_training=True):
        try:
            with self.lock:
                self.container.load_intent(intent_name,
                                           file_name,
                                           reload_cache=reload_cache)
            self.registered_intents.append(intent_name)
            success = self._train(single_thread=single_thread,
                                  timeout=timeout,
                                  force_training=force_training)
            if success:
                LOG.debug(file_name + " trained successfully")
            else:
                LOG.error(file_name + " FAILED TO TRAIN")

        except Exception as e:
            LOG.exception(e)

    def _get_remainder(self, intent, utterance):
        if intent["name"] in self.intent_samples:
            return get_utterance_remainder(
                utterance, samples=self.intent_samples[intent["name"]])
        return utterance

    def calc_intent(self, utterance, min_conf=None):
        min_conf = min_conf or self.config.get("padatious_min_conf", 0.65)
        utterance = utterance.strip().lower()
        with self.lock:
            intent = self.container.calc_intent(utterance).__dict__
        if intent["conf"] < min_conf:
            return {
                "intent_type": "unknown",
                "entities": {},
                "conf": 0,
                "intent_engine": "padatious",
                "utterance": utterance,
                "utterance_remainder": utterance
            }
        intent["utterance_remainder"] = self._get_remainder(intent, utterance)
        intent["entities"] = intent.pop("matches")
        intent["intent_engine"] = "padatious"
        intent["intent_type"] = intent.pop("name")
        intent["utterance"] = intent.pop("sent")

        if isinstance(intent["utterance"], list):
            intent["utterance"] = " ".join(intent["utterance"])
        return intent

    def intent_scores(self, utterance):
        utterance = utterance.strip().lower()
        intents = [i.__dict__ for i in self.container.calc_intents(utterance)]
        for idx, intent in enumerate(intents):
            intent["utterance_remainder"] = self._get_remainder(
                intent, utterance)
            intents[idx]["entities"] = intents[idx].pop("matches")
            intents[idx]["intent_type"] = intents[idx].pop("name")
            intent["intent_engine"] = "padatious"
            intent["utterance"] = intent.pop("sent")
            if isinstance(intents[idx]["utterance"], list):
                intents[idx]["utterance"] = " ".join(intents[idx]["utterance"])
        return intents

    def calc_intents(self, utterance, min_conf=None):
        min_conf = min_conf or self.config.get("padatious_min_conf", 0.65)
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            intent = self.calc_intent(ut)
            if intent["conf"] < min_conf:
                bucket[ut] = None
            else:
                bucket[ut] = intent
        return bucket

    def calc_intents_list(self, utterance):
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            bucket[ut] = self.filter_intents(ut)
        return bucket

    def manifest(self):
        # TODO vocab, skill ids, intent_data
        return {"intent_names": self.registered_intents}

    def _train(self, single_thread=True, timeout=120, force_training=True):
        with self.lock:
            return self.container.train(single_thread=single_thread,
                                        timeout=timeout,
                                        force=force_training,
                                        debug=True)