def search_path( rootpath: str, include_pairs: bool = True) -> Dict[str, List[Tuple[str, str, str]]]: """ Args: rootpath (str) include_pairs (bool) Returns: Dict[str, List[Tuple[str, str, str]]] """ lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?' type_re = { 'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), 'generator': re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), 'pair': re.compile(r'({0})-({0})\.mode'.format(lang_code)), 'tagger': re.compile(r'(({0}(-{0})?)-tagger[A-z]*)\.mode'.format(lang_code)), } modes = { 'analyzer': [], 'generator': [], 'pair': [], 'tagger': [], } # type: Dict[str, List[Tuple[str, str, str]]] real_root = os.path.abspath(os.path.realpath(rootpath)) for dirpath, dirnames, files in os.walk(rootpath, followlinks=True): if is_loop(dirpath, rootpath, real_root): dirnames[:] = [] continue for filename in [f for f in files if f.endswith('.mode')]: for mtype, regex in type_re.items(): m = regex.match(filename) if m: if mtype != 'pair': modename = m.group(1) # e.g. en-es-anmorph langlist = [ to_alpha3_code(l) for l in m.group(2).split('-') ] lang_pair = '-'.join(langlist) # e.g. en-es dir_of_modes = os.path.dirname(dirpath) mode = (dir_of_modes, modename, lang_pair) modes[mtype].append(mode) elif include_pairs: lang_src, lang_trg = m.groups() mode = (os.path.join(dirpath, filename), to_alpha3_code(lang_src), to_alpha3_code(lang_trg)) modes[mtype].append(mode) return modes
def __init__(self, lang: str) -> None: """ Args: lang (str) """ self.analyzer_cmds = {} # type: Dict[str, List[List[str]]] self.lang = to_alpha3_code(lang) # type: str if self.lang not in apertium.analyzers: raise apertium.ModeNotInstalled(self.lang) else: self.path, self.mode = apertium.analyzers[self.lang]
def __init__(self, lang: str) -> None: """ Args: lang (str) """ self.tagger_cmds: Dict[str, List[List[str]]] = {} self.lang: str = to_alpha3_code(lang) if self.lang not in apertium.taggers: raise apertium.ModeNotInstalled(self.lang) else: self.path, self.mode = apertium.taggers[self.lang]
def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -> Union[str, List[str]] """ Args: in_text (str) formatting (str) Returns: Union[str, List[str]] """ self.lang = to_alpha3_code(self.lang) if self.lang in apertium.generators: commands = list(self._get_commands()) result = execute(in_text, commands) return result.rstrip('\x00') else: raise apertium.ModeNotInstalled(self.lang)