示例#1
0
 def test_mix_detected(self):
     with self.subTest('summary'):
         self.assertEqual(LangCat.MIX,
                          LangCat.categorize('일=one\n이=two', False))
     with self.subTest('detail'):
         self.assertEqual({LangCat.HAN, LangCat.ENG},
                          LangCat.categorize('일=two\n이=two', True))
示例#2
0
 def test_punc_num_ignored_for_categorize(self):
     with self.subTest('summary'):
         self.assertEqual(LangCat.HAN,
                          LangCat.categorize('일=1\n이=2', False))
     with self.subTest('detail'):
         self.assertEqual({LangCat.HAN},
                          LangCat.categorize('일=1\n이=2', True))
示例#3
0
    def from_parts(cls, parts: Iterable[str], **kwargs) -> 'Name':
        eng = None
        non_eng = None
        extra = []
        name = None
        for part in parts:
            if not part:
                continue
            elif name is not None:
                extra.append(part)
            elif not non_eng and LangCat.contains_any(part,
                                                      LangCat.non_eng_cats):
                non_eng = part
            elif not eng and LangCat.contains_any(part, LangCat.ENG):
                eng = part
            elif eng and non_eng and LangCat.categorize(part) == LangCat.ENG:
                name = cls(eng, non_eng, **kwargs)
                if name.has_romanization(part):
                    name.romanized = part
                elif name.has_romanization(
                        eng) and not is_english(eng) and is_english(part):
                    name._english = part
                    name.romanized = eng
                else:
                    name = None
                    extra.append(part)
            else:
                extra.append(part)

        if name is None:
            if eng or non_eng:
                name = cls(eng, non_eng, **kwargs)
            elif extra and len(extra) == 1:
                name = cls(extra[0], **kwargs)
                extra = None
        if name is None:
            raise ValueError(
                f'Unable to find any valid name parts from {parts!r}; found {extra=!r}'
            )
        if extra:
            if name.extra:
                name.extra['unknown'] = extra  # noqa
            else:
                name.extra = {'unknown': extra}
        return name
示例#4
0
def _split_non_eng_lit(name_parts_str: str):
    # log.debug(f'Splitting: {name_parts_str!r}')
    non_eng, lit_translation = None, None
    if name_parts_str.startswith('('):
        name_parts_str = parenthesized(name_parts_str)
    if name_parts_str and LangCat.contains_any(name_parts_str, LangCat.asian_cats):
        name_parts = tuple(map(str.strip, name_parts_str.split(';')))
        if len(name_parts) == 1:
            non_eng = name_parts[0]
        elif len(name_parts) == 2:
            non_eng, lit_translation = name_parts
        else:
            raise ValueError(f'Unexpected name parts format: {name_parts_str!r}')
    return non_eng, lit_translation
示例#5
0
def sort_name_parts(parts: Iterable[str]) -> list[Optional[str]]:
    parts = list(p.value for p in sorted(
        _NamePart(i, part) for i, part in enumerate(parts)))
    if parts and not LangCat.contains_any(parts[0], LangCat.ENG):
        parts.insert(0, None)
    return parts
示例#6
0
 def __init__(self, pos: int, value: str):
     self.pos = pos
     self.value = value
     self.cat = LangCat.categorize(value)
示例#7
0
 def split(self) -> 'Name':
     return self.from_parts(LangCat.split(self.english),
                            versions={self,
                                      Name(non_eng=self.english)})
示例#8
0
 def from_enclosed(cls, name: str, **kwargs) -> 'Name':
     if LangCat.categorize(name) == LangCat.MIX:
         parts = split_enclosed(name, reverse=True, maxsplit=1)
     else:
         parts = (name, )
     return cls.from_parts(parts, **kwargs)
示例#9
0
 def non_eng_langs(self) -> set[LangCat]:
     return LangCat.categorize(self.non_eng, True)
示例#10
0
 def non_eng_lang(self) -> LangCat:
     return LangCat.categorize(self.non_eng)
示例#11
0
 def eng_langs(self) -> set[LangCat]:
     return LangCat.categorize(self.english, True)
示例#12
0
 def eng_lang(self) -> LangCat:
     return LangCat.categorize(self.english)
示例#13
0
 def test_spaces_ignored_for_categorize(self):
     with self.subTest('summary'):
         self.assertEqual(LangCat.HAN, LangCat.categorize('일 이', False))
     with self.subTest('detail'):
         self.assertEqual({LangCat.HAN}, LangCat.categorize('일 이', True))