def test_mix_detected(self): with self.subTest('summary'): self.assertEqual(LangCat.MIX, LangCat.categorize('일=one\n이=two', False)) with self.subTest('detail'): self.assertEqual({LangCat.HAN, LangCat.ENG}, LangCat.categorize('일=two\n이=two', True))
def test_punc_num_ignored_for_categorize(self): with self.subTest('summary'): self.assertEqual(LangCat.HAN, LangCat.categorize('일=1\n이=2', False)) with self.subTest('detail'): self.assertEqual({LangCat.HAN}, LangCat.categorize('일=1\n이=2', True))
def from_parts(cls, parts: Iterable[str], **kwargs) -> 'Name': eng = None non_eng = None extra = [] name = None for part in parts: if not part: continue elif name is not None: extra.append(part) elif not non_eng and LangCat.contains_any(part, LangCat.non_eng_cats): non_eng = part elif not eng and LangCat.contains_any(part, LangCat.ENG): eng = part elif eng and non_eng and LangCat.categorize(part) == LangCat.ENG: name = cls(eng, non_eng, **kwargs) if name.has_romanization(part): name.romanized = part elif name.has_romanization( eng) and not is_english(eng) and is_english(part): name._english = part name.romanized = eng else: name = None extra.append(part) else: extra.append(part) if name is None: if eng or non_eng: name = cls(eng, non_eng, **kwargs) elif extra and len(extra) == 1: name = cls(extra[0], **kwargs) extra = None if name is None: raise ValueError( f'Unable to find any valid name parts from {parts!r}; found {extra=!r}' ) if extra: if name.extra: name.extra['unknown'] = extra # noqa else: name.extra = {'unknown': extra} return name
def __init__(self, pos: int, value: str): self.pos = pos self.value = value self.cat = LangCat.categorize(value)
def from_enclosed(cls, name: str, **kwargs) -> 'Name': if LangCat.categorize(name) == LangCat.MIX: parts = split_enclosed(name, reverse=True, maxsplit=1) else: parts = (name, ) return cls.from_parts(parts, **kwargs)
def non_eng_langs(self) -> set[LangCat]: return LangCat.categorize(self.non_eng, True)
def non_eng_lang(self) -> LangCat: return LangCat.categorize(self.non_eng)
def eng_langs(self) -> set[LangCat]: return LangCat.categorize(self.english, True)
def eng_lang(self) -> LangCat: return LangCat.categorize(self.english)
def test_spaces_ignored_for_categorize(self): with self.subTest('summary'): self.assertEqual(LangCat.HAN, LangCat.categorize('일 이', False)) with self.subTest('detail'): self.assertEqual({LangCat.HAN}, LangCat.categorize('일 이', True))