class PadatiousFileIntent(IntentPlugin): """Interface for Padatious intent engine""" def __init__(self, rt): super().__init__(rt) self.container = IntentContainer( join(rt.paths.user_config, 'intent_cache')) def register(self, intent: Any, skill_name: str, intent_id: str): file_name = join(self.rt.paths.skill_locale(skill_name), intent + '.intent') self.container.load_intent(name=intent_id, file_name=file_name) def register_entity(self, entity: Any, entity_id: str, skill_name: str): file_name = join(self.rt.paths.skill_locale(skill_name), entity + '.intent') self.container.load_intent(name=entity_id, file_name=file_name) def unregister(self, intent_id: str): self.container.remove_intent(intent_id) def unregister_entity(self, entity_id: str): self.container.remove_entity(entity_id) def compile(self): log.info('Training...') self.container.train() log.info('Training complete!') def calc_intents(self, query): return [ IntentMatch(intent_id=data.name, confidence=data.conf, matches=data.matches, query=query) for data in self.container.calc_intents(query) ]
class NeuralNER(RuleNER): def __init__(self): cache = expanduser("~/.simple_NER") if not isdir(cache): makedirs(cache) self._container = IntentContainer(join(cache, "rule_cache")) self._rules = {} self._examples = {} def extract_entities(self, text, as_json=False): for rule in self._container.calc_intents(text): for e in rule.matches: if as_json: yield Entity(rule.matches[e], entity_type=e, source_text=text, confidence=rule.conf, rules=self._rules[rule.name]).as_json() else: yield Entity(rule.matches[e], entity_type=e, source_text=text, confidence=rule.conf, rules=self._rules[rule.name])
class PadatiousExtractor(IntentExtractor): keyword_based = False def __init__(self, cache_dir=None, *args, **kwargs): super().__init__(*args, **kwargs) # TODO xdg data_dir data_dir = expanduser(self.config.get("data_dir", "~/.padatious")) cache_dir = cache_dir or join(data_dir, "padatious") self.lock = Lock() self.container = IntentContainer(cache_dir) self.registered_intents = [] def detach_intent(self, intent_name): if intent_name in self.registered_intents: LOG.debug("Detaching padatious intent: " + intent_name) with self.lock: self.container.remove_intent(intent_name) self.registered_intents.remove(intent_name) def detach_skill(self, skill_id): LOG.debug("Detaching padatious skill: " + str(skill_id)) remove_list = [i for i in self.registered_intents if skill_id in i] for i in remove_list: self.detach_intent(i) def register_entity(self, entity_name, samples=None, reload_cache=True): samples = samples or [entity_name] with self.lock: self.container.add_entity(entity_name, samples, reload_cache=reload_cache) def register_intent(self, intent_name, samples=None, reload_cache=True): samples = samples or [intent_name] if intent_name not in self._intent_samples: self._intent_samples[intent_name] = samples else: self._intent_samples[intent_name] += samples with self.lock: self.container.add_intent(intent_name, samples, reload_cache=reload_cache) self.registered_intents.append(intent_name) def register_entity_from_file(self, entity_name, file_name, reload_cache=True): with self.lock: self.container.load_entity(entity_name, file_name, reload_cache=reload_cache) def register_intent_from_file(self, intent_name, file_name, single_thread=True, timeout=120, reload_cache=True, force_training=True): try: with self.lock: self.container.load_intent(intent_name, file_name, reload_cache=reload_cache) self.registered_intents.append(intent_name) success = self._train(single_thread=single_thread, timeout=timeout, force_training=force_training) if success: LOG.debug(file_name + " trained successfully") else: LOG.error(file_name + " FAILED TO TRAIN") except Exception as e: LOG.exception(e) def _get_remainder(self, intent, utterance): if intent["name"] in self.intent_samples: return get_utterance_remainder( utterance, samples=self.intent_samples[intent["name"]]) return utterance def calc_intent(self, utterance, min_conf=None): min_conf = min_conf or self.config.get("padatious_min_conf", 0.65) utterance = utterance.strip().lower() with self.lock: intent = self.container.calc_intent(utterance).__dict__ if intent["conf"] < min_conf: return { "intent_type": "unknown", "entities": {}, "conf": 0, "intent_engine": "padatious", "utterance": utterance, "utterance_remainder": utterance } intent["utterance_remainder"] = self._get_remainder(intent, utterance) intent["entities"] = intent.pop("matches") intent["intent_engine"] = "padatious" intent["intent_type"] = intent.pop("name") intent["utterance"] = intent.pop("sent") if isinstance(intent["utterance"], list): intent["utterance"] = " ".join(intent["utterance"]) return intent def intent_scores(self, utterance): utterance = utterance.strip().lower() intents = [i.__dict__ for i in self.container.calc_intents(utterance)] for idx, intent in enumerate(intents): intent["utterance_remainder"] = self._get_remainder( intent, utterance) intents[idx]["entities"] = intents[idx].pop("matches") intents[idx]["intent_type"] = intents[idx].pop("name") intent["intent_engine"] = "padatious" intent["utterance"] = intent.pop("sent") if isinstance(intents[idx]["utterance"], list): intents[idx]["utterance"] = " ".join(intents[idx]["utterance"]) return intents def calc_intents(self, utterance, min_conf=None): min_conf = min_conf or self.config.get("padatious_min_conf", 0.65) utterance = utterance.strip().lower() bucket = {} for ut in self.segmenter.segment(utterance): intent = self.calc_intent(ut) if intent["conf"] < min_conf: bucket[ut] = None else: bucket[ut] = intent return bucket def calc_intents_list(self, utterance): utterance = utterance.strip().lower() bucket = {} for ut in self.segmenter.segment(utterance): bucket[ut] = self.filter_intents(ut) return bucket def manifest(self): # TODO vocab, skill ids, intent_data return {"intent_names": self.registered_intents} def _train(self, single_thread=True, timeout=120, force_training=True): with self.lock: return self.container.train(single_thread=single_thread, timeout=timeout, force=force_training, debug=True)