Python normalize示例，intentBox.utils.normalize Python示例

示例#1

0

显示文件

文件： template.py 项目： OpenJarbas/intentBox

 def get_normalizations(self, utterance, lang=None):
     lang = lang or self.lang
     norm = normalize(utterance, remove_articles=True, lang=lang)
     norm2 = normalize(utterance, remove_articles=False, lang=lang)
     norm3 = re.sub(r'[^\w]', ' ', utterance)
     norm4 = ''.join([
         i if 64 < ord(i) < 128 or ord(i) == 32 else '' for i in utterance
     ])
     return [u for u in [norm, norm2, norm3, norm4] if u != utterance]

示例#2

0

显示文件

文件： adapt_extract.py 项目： OpenJarbas/intentBox

    def calc_intents_list(self, utterance, min_conf=0.5):
        utterance = utterance.strip()  # spaces should not mess with exact matches
        bucket = {}
        for ut in self.segmenter.segment(utterance):

            if self.normalize:
                ut = normalize(ut, self.lang, True)
            bucket[ut] = []
            for intent in self.engine.determine_intent(ut, 100,
                                                       include_tags=True,
                                                       context_manager=self.context_manager):
                if intent:
                    intent.pop("target")
                    matches = {k: v for k, v in intent.items() if
                               k not in ["intent_type", "confidence",
                                         "__tags__"]}
                    intent["entities"] = {}
                    for k in matches:
                        intent["entities"][k] = intent.pop(k)
                    intent["conf"] = intent.pop("confidence")
                    intent["utterance"] = ut
                    intent["intent_engine"] = "adapt"
                    remainder = get_utterance_remainder(
                        utterance, samples=[v for v in matches.values()])
                    intent["utterance_remainder"] = remainder
                    if intent["conf"] >= min_conf:
                        bucket[ut] += [intent]

        return bucket

示例#3

0

显示文件

文件： adapt_extract.py 项目： OpenJarbas/intentBox

    def calc_intent(self, utterance):
        utterance = utterance.strip()
        if self.normalize:
            utterance = normalize(utterance, self.lang, True)
        for intent in self.engine.determine_intent(utterance, 100,
                                                   include_tags=True,
                                                   context_manager=self.context_manager):
            if intent and intent.get('confidence') > 0:
                intent.pop("target")
                matches = {k: v for k, v in intent.items() if
                           k not in ["intent_type", "confidence", "__tags__"]}
                intent["entities"] = {}
                for k in matches:
                    intent["entities"][k] = intent.pop(k)
                intent["conf"] = intent.pop("confidence")
                intent["utterance"] = utterance
                intent["intent_engine"] = "adapt"

                remainder = get_utterance_remainder(
                    utterance, samples=[v for v in matches.values()])
                intent["utterance_remainder"] = remainder
                return intent
        return {"conf": 0, "intent_type": "unknown", "entities": {},
                "utterance_remainder": utterance,
                "utterance": utterance, "intent_engine": "adapt"}

示例#4

0

显示文件

文件： adapt_extract.py 项目： OpenJarbas/intentBox

    def intents_remainder(self, utterance, min_conf=0.5):
        """
        segment utterance and for each chunk recursively check for intents in utterance remainer

        :param utterance:
        :param min_conf:
        :return:
        """
        utterance = utterance.strip()  # spaces should not mess with exact matches
        bucket = {}
        for utterance in self.segmenter.segment(utterance):
            if self.normalize:
                utterance = normalize(utterance, self.lang, True)
            bucket[utterance] = self.intent_remainder(utterance)
        return bucket

示例#5

0

显示文件

    def calc_intent(self, utterance):
        utterance = utterance.strip()
        if self.normalize:
            utterance = normalize(utterance, self.lang, True)
        for intent in self.engine.determine_intent(
                utterance,
                100,
                include_tags=True,
                context_manager=self.context_manager):
            if intent and intent.get('confidence') > 0:
                intent.pop("target")
                matches = {
                    k: v
                    for k, v in intent.items()
                    if k not in ["intent_type", "confidence", "__tags__"]
                }
                intent["entities"] = {}
                for k in matches:
                    intent["entities"][k] = intent.pop(k)
                intent["conf"] = intent.pop("confidence")
                intent["utterance"] = utterance
                intent["intent_engine"] = "adapt"

                remainder = get_utterance_remainder(
                    utterance, samples=[v for v in matches.values()])
                intent["utterance_remainder"] = remainder

                # HACK adapt is notorious for handling regex poorly
                # we really need to artificially boost its confidence or
                # nothing will match
                if any(k in matches for k in self.regexes):
                    intent["conf"] += self.regex_boost

                return intent
        return {
            "conf": 0,
            "intent_type": "unknown",
            "entities": {},
            "utterance_remainder": utterance,
            "utterance": utterance,
            "intent_engine": "adapt"
        }

示例#6

0

显示文件

文件： __init__.py 项目： OpenJarbas/intentBox

 def _load(path, lang="en-us", norm=True, lowercase=True):
     with open(path) as f:
         samples = f.readlines()
     samples = [
         s.strip() for s in samples if not s.strip().startswith("#")
     ]  # filter comments
     samples = [s.replace("{{", "{").replace("}}", "}")
                for s in samples]  # clean double brackets
     samples = [
         s.replace("(", " ( ").replace(")", " ) ").replace(
             "{", " { ").replace("}", " } ").replace("|", " | ").replace(
                 "]", " ] ").replace("[", " [ ") for s in samples
     ]  # add missing spaces
     samples = [" ".join(s.split())
                for s in samples]  # clean extra white spaces
     if norm:
         samples = [
             normalize(s, lang, remove_articles=True) for s in samples
         ] + samples
     if lowercase:
         samples = [s.lower() for s in samples if s.lower()]
     return list(set(samples))

示例#7

0

显示文件

文件： adapt_extract.py 项目： OpenJarbas/intentBox

 def segment(self, text):
     if self.normalize:
         text = normalize(text, self.lang, True)
     return self.segment(text)

示例#8

0

显示文件

文件： adapt_extract.py 项目： OpenJarbas/intentBox

 def intent_remainder(self, utterance, _prev=""):
     utterance = utterance.strip()  # spaces should not mess with exact matches
     if self.normalize:
         utterance = normalize(utterance, self.lang, True)
     return IntentExtractor.intent_remainder(self, utterance)