示例#1
0
 def test_mix_detected(self):
     with self.subTest('summary'):
         self.assertEqual(LangCat.MIX,
                          LangCat.categorize('일=one\n이=two', False))
     with self.subTest('detail'):
         self.assertEqual({LangCat.HAN, LangCat.ENG},
                          LangCat.categorize('일=two\n이=two', True))
示例#2
0
 def test_punc_num_ignored_for_categorize(self):
     with self.subTest('summary'):
         self.assertEqual(LangCat.HAN,
                          LangCat.categorize('일=1\n이=2', False))
     with self.subTest('detail'):
         self.assertEqual({LangCat.HAN},
                          LangCat.categorize('일=1\n이=2', True))
示例#3
0
    def from_parts(cls, parts: Iterable[str], **kwargs) -> 'Name':
        eng = None
        non_eng = None
        extra = []
        name = None
        for part in parts:
            if not part:
                continue
            elif name is not None:
                extra.append(part)
            elif not non_eng and LangCat.contains_any(part,
                                                      LangCat.non_eng_cats):
                non_eng = part
            elif not eng and LangCat.contains_any(part, LangCat.ENG):
                eng = part
            elif eng and non_eng and LangCat.categorize(part) == LangCat.ENG:
                name = cls(eng, non_eng, **kwargs)
                if name.has_romanization(part):
                    name.romanized = part
                elif name.has_romanization(
                        eng) and not is_english(eng) and is_english(part):
                    name._english = part
                    name.romanized = eng
                else:
                    name = None
                    extra.append(part)
            else:
                extra.append(part)

        if name is None:
            if eng or non_eng:
                name = cls(eng, non_eng, **kwargs)
            elif extra and len(extra) == 1:
                name = cls(extra[0], **kwargs)
                extra = None
        if name is None:
            raise ValueError(
                f'Unable to find any valid name parts from {parts!r}; found {extra=!r}'
            )
        if extra:
            if name.extra:
                name.extra['unknown'] = extra  # noqa
            else:
                name.extra = {'unknown': extra}
        return name
示例#4
0
 def __init__(self, pos: int, value: str):
     self.pos = pos
     self.value = value
     self.cat = LangCat.categorize(value)
示例#5
0
 def from_enclosed(cls, name: str, **kwargs) -> 'Name':
     if LangCat.categorize(name) == LangCat.MIX:
         parts = split_enclosed(name, reverse=True, maxsplit=1)
     else:
         parts = (name, )
     return cls.from_parts(parts, **kwargs)
示例#6
0
 def non_eng_langs(self) -> set[LangCat]:
     return LangCat.categorize(self.non_eng, True)
示例#7
0
 def non_eng_lang(self) -> LangCat:
     return LangCat.categorize(self.non_eng)
示例#8
0
 def eng_langs(self) -> set[LangCat]:
     return LangCat.categorize(self.english, True)
示例#9
0
 def eng_lang(self) -> LangCat:
     return LangCat.categorize(self.english)
示例#10
0
 def test_spaces_ignored_for_categorize(self):
     with self.subTest('summary'):
         self.assertEqual(LangCat.HAN, LangCat.categorize('일 이', False))
     with self.subTest('detail'):
         self.assertEqual({LangCat.HAN}, LangCat.categorize('일 이', True))