Пример #1
0
def segment_string(text: str):
    """
    Divides one larger word into segments
    :param text:
    :return:
    """

    base = text.lstrip("-")

    # Replace symbols with their unicode names
    translated = re.sub(
        "[^[:alpha:]\s\-_]", lambda match: human_readable_translate(match.group()), base
    )

    dash_tokens = re.split("[-_ ]", translated)
    segment_tokens = itertools.chain.from_iterable(
        [wordsegment.segment(w) for w in dash_tokens]
    )
    return [sanitize_token(tok) for tok in segment_tokens]
Пример #2
0
 def text(self) -> typing.List[str]:
     return list(
         itertools.chain.from_iterable(
             [wordsegment.segment(name) for name in self.choices]))
Пример #3
0
 def text(self) -> typing.List[str]:
     return list(wordsegment.segment(self.name))
Пример #4
0
 def reserved(self) -> Set[Tuple[str, ...]]:
     # Steal the keywords list from miniWDL
     return {tuple(wordsegment.segment(key)) for key in keywords["1.0"]}