def from_parts(cls, parts: Iterable[str], **kwargs) -> 'Name': eng = None non_eng = None extra = [] name = None for part in parts: if not part: continue elif name is not None: extra.append(part) elif not non_eng and LangCat.contains_any(part, LangCat.non_eng_cats): non_eng = part elif not eng and LangCat.contains_any(part, LangCat.ENG): eng = part elif eng and non_eng and LangCat.categorize(part) == LangCat.ENG: name = cls(eng, non_eng, **kwargs) if name.has_romanization(part): name.romanized = part elif name.has_romanization( eng) and not is_english(eng) and is_english(part): name._english = part name.romanized = eng else: name = None extra.append(part) else: extra.append(part) if name is None: if eng or non_eng: name = cls(eng, non_eng, **kwargs) elif extra and len(extra) == 1: name = cls(extra[0], **kwargs) extra = None if name is None: raise ValueError( f'Unable to find any valid name parts from {parts!r}; found {extra=!r}' ) if extra: if name.extra: name.extra['unknown'] = extra # noqa else: name.extra = {'unknown': extra} return name
def _split_non_eng_lit(name_parts_str: str): # log.debug(f'Splitting: {name_parts_str!r}') non_eng, lit_translation = None, None if name_parts_str.startswith('('): name_parts_str = parenthesized(name_parts_str) if name_parts_str and LangCat.contains_any(name_parts_str, LangCat.asian_cats): name_parts = tuple(map(str.strip, name_parts_str.split(';'))) if len(name_parts) == 1: non_eng = name_parts[0] elif len(name_parts) == 2: non_eng, lit_translation = name_parts else: raise ValueError(f'Unexpected name parts format: {name_parts_str!r}') return non_eng, lit_translation
def sort_name_parts(parts: Iterable[str]) -> list[Optional[str]]: parts = list(p.value for p in sorted( _NamePart(i, part) for i, part in enumerate(parts))) if parts and not LangCat.contains_any(parts[0], LangCat.ENG): parts.insert(0, None) return parts