def get_normalizations(self, utterance, lang=None): lang = lang or self.lang norm = normalize(utterance, remove_articles=True, lang=lang) norm2 = normalize(utterance, remove_articles=False, lang=lang) norm3 = re.sub(r'[^\w]', ' ', utterance) norm4 = ''.join([ i if 64 < ord(i) < 128 or ord(i) == 32 else '' for i in utterance ]) return [u for u in [norm, norm2, norm3, norm4] if u != utterance]
def calc_intents_list(self, utterance, min_conf=0.5): utterance = utterance.strip() # spaces should not mess with exact matches bucket = {} for ut in self.segmenter.segment(utterance): if self.normalize: ut = normalize(ut, self.lang, True) bucket[ut] = [] for intent in self.engine.determine_intent(ut, 100, include_tags=True, context_manager=self.context_manager): if intent: intent.pop("target") matches = {k: v for k, v in intent.items() if k not in ["intent_type", "confidence", "__tags__"]} intent["entities"] = {} for k in matches: intent["entities"][k] = intent.pop(k) intent["conf"] = intent.pop("confidence") intent["utterance"] = ut intent["intent_engine"] = "adapt" remainder = get_utterance_remainder( utterance, samples=[v for v in matches.values()]) intent["utterance_remainder"] = remainder if intent["conf"] >= min_conf: bucket[ut] += [intent] return bucket
def calc_intent(self, utterance): utterance = utterance.strip() if self.normalize: utterance = normalize(utterance, self.lang, True) for intent in self.engine.determine_intent(utterance, 100, include_tags=True, context_manager=self.context_manager): if intent and intent.get('confidence') > 0: intent.pop("target") matches = {k: v for k, v in intent.items() if k not in ["intent_type", "confidence", "__tags__"]} intent["entities"] = {} for k in matches: intent["entities"][k] = intent.pop(k) intent["conf"] = intent.pop("confidence") intent["utterance"] = utterance intent["intent_engine"] = "adapt" remainder = get_utterance_remainder( utterance, samples=[v for v in matches.values()]) intent["utterance_remainder"] = remainder return intent return {"conf": 0, "intent_type": "unknown", "entities": {}, "utterance_remainder": utterance, "utterance": utterance, "intent_engine": "adapt"}
def intents_remainder(self, utterance, min_conf=0.5): """ segment utterance and for each chunk recursively check for intents in utterance remainer :param utterance: :param min_conf: :return: """ utterance = utterance.strip() # spaces should not mess with exact matches bucket = {} for utterance in self.segmenter.segment(utterance): if self.normalize: utterance = normalize(utterance, self.lang, True) bucket[utterance] = self.intent_remainder(utterance) return bucket
def calc_intent(self, utterance): utterance = utterance.strip() if self.normalize: utterance = normalize(utterance, self.lang, True) for intent in self.engine.determine_intent( utterance, 100, include_tags=True, context_manager=self.context_manager): if intent and intent.get('confidence') > 0: intent.pop("target") matches = { k: v for k, v in intent.items() if k not in ["intent_type", "confidence", "__tags__"] } intent["entities"] = {} for k in matches: intent["entities"][k] = intent.pop(k) intent["conf"] = intent.pop("confidence") intent["utterance"] = utterance intent["intent_engine"] = "adapt" remainder = get_utterance_remainder( utterance, samples=[v for v in matches.values()]) intent["utterance_remainder"] = remainder # HACK adapt is notorious for handling regex poorly # we really need to artificially boost its confidence or # nothing will match if any(k in matches for k in self.regexes): intent["conf"] += self.regex_boost return intent return { "conf": 0, "intent_type": "unknown", "entities": {}, "utterance_remainder": utterance, "utterance": utterance, "intent_engine": "adapt" }
def _load(path, lang="en-us", norm=True, lowercase=True): with open(path) as f: samples = f.readlines() samples = [ s.strip() for s in samples if not s.strip().startswith("#") ] # filter comments samples = [s.replace("{{", "{").replace("}}", "}") for s in samples] # clean double brackets samples = [ s.replace("(", " ( ").replace(")", " ) ").replace( "{", " { ").replace("}", " } ").replace("|", " | ").replace( "]", " ] ").replace("[", " [ ") for s in samples ] # add missing spaces samples = [" ".join(s.split()) for s in samples] # clean extra white spaces if norm: samples = [ normalize(s, lang, remove_articles=True) for s in samples ] + samples if lowercase: samples = [s.lower() for s in samples if s.lower()] return list(set(samples))
def segment(self, text): if self.normalize: text = normalize(text, self.lang, True) return self.segment(text)
def intent_remainder(self, utterance, _prev=""): utterance = utterance.strip() # spaces should not mess with exact matches if self.normalize: utterance = normalize(utterance, self.lang, True) return IntentExtractor.intent_remainder(self, utterance)