Пример #1
0
    def from_parts(cls, parts: Iterable[str], **kwargs) -> 'Name':
        eng = None
        non_eng = None
        extra = []
        name = None
        for part in parts:
            if not part:
                continue
            elif name is not None:
                extra.append(part)
            elif not non_eng and LangCat.contains_any(part,
                                                      LangCat.non_eng_cats):
                non_eng = part
            elif not eng and LangCat.contains_any(part, LangCat.ENG):
                eng = part
            elif eng and non_eng and LangCat.categorize(part) == LangCat.ENG:
                name = cls(eng, non_eng, **kwargs)
                if name.has_romanization(part):
                    name.romanized = part
                elif name.has_romanization(
                        eng) and not is_english(eng) and is_english(part):
                    name._english = part
                    name.romanized = eng
                else:
                    name = None
                    extra.append(part)
            else:
                extra.append(part)

        if name is None:
            if eng or non_eng:
                name = cls(eng, non_eng, **kwargs)
            elif extra and len(extra) == 1:
                name = cls(extra[0], **kwargs)
                extra = None
        if name is None:
            raise ValueError(
                f'Unable to find any valid name parts from {parts!r}; found {extra=!r}'
            )
        if extra:
            if name.extra:
                name.extra['unknown'] = extra  # noqa
            else:
                name.extra = {'unknown': extra}
        return name
Пример #2
0
def _split_non_eng_lit(name_parts_str: str):
    # log.debug(f'Splitting: {name_parts_str!r}')
    non_eng, lit_translation = None, None
    if name_parts_str.startswith('('):
        name_parts_str = parenthesized(name_parts_str)
    if name_parts_str and LangCat.contains_any(name_parts_str, LangCat.asian_cats):
        name_parts = tuple(map(str.strip, name_parts_str.split(';')))
        if len(name_parts) == 1:
            non_eng = name_parts[0]
        elif len(name_parts) == 2:
            non_eng, lit_translation = name_parts
        else:
            raise ValueError(f'Unexpected name parts format: {name_parts_str!r}')
    return non_eng, lit_translation
Пример #3
0
def sort_name_parts(parts: Iterable[str]) -> list[Optional[str]]:
    parts = list(p.value for p in sorted(
        _NamePart(i, part) for i, part in enumerate(parts)))
    if parts and not LangCat.contains_any(parts[0], LangCat.ENG):
        parts.insert(0, None)
    return parts