def __init__(self, input_case: str, lang: str = 'en'): super().__init__(input_case=input_case, lang=lang) if lang == 'en': from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst self.tagger = ClassifyFst(input_case=input_case, deterministic=False) self.verbalizer = VerbalizeFinalFst(deterministic=False)
def __init__( self, input_case: str, lang: str = 'en', deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False, ): assert input_case in ["lower_cased", "cased"] if lang == 'en' and deterministic: from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst elif lang == 'en' and not deterministic: from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst elif lang == 'ru': # Ru TN only support non-deterministic cases and produces multiple normalization options # use normalize_with_audio.py from nemo_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst self.tagger = ClassifyFst(input_case=input_case, deterministic=deterministic, cache_dir=cache_dir, overwrite_cache=overwrite_cache) self.verbalizer = VerbalizeFinalFst(deterministic=deterministic) self.parser = TokenParser() self.lang = lang
def __init__(self, input_case: str, lang: str = 'en'): assert input_case in ["lower_cased", "cased"] if lang == 'en': from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst self.tagger = ClassifyFst(input_case=input_case, deterministic=True) self.verbalizer = VerbalizeFinalFst(deterministic=True) self.parser = TokenParser()
def __init__( self, input_case: str, lang: str = 'en', deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False, whitelist: str = None, lm: bool = False, ): assert input_case in ["lower_cased", "cased"] if not PYNINI_AVAILABLE: raise ImportError(get_installation_msg()) if lang == 'en' and deterministic: from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst elif lang == 'en' and not deterministic: if lm: from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_lm import ClassifyFst else: from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ( ClassifyFst, ) from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst elif lang == 'ru': # Ru TN only support non-deterministic cases and produces multiple normalization options # use normalize_with_audio.py from nemo_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst elif lang == 'de': from nemo_text_processing.text_normalization.de.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.de.verbalizers.verbalize_final import VerbalizeFinalFst elif lang == 'es': from nemo_text_processing.text_normalization.es.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.es.verbalizers.verbalize_final import VerbalizeFinalFst self.tagger = ClassifyFst( input_case=input_case, deterministic=deterministic, cache_dir=cache_dir, overwrite_cache=overwrite_cache, whitelist=whitelist, ) self.verbalizer = VerbalizeFinalFst(deterministic=deterministic) self.parser = TokenParser() self.lang = lang if NLP_AVAILABLE: self.processor = MosesProcessor(lang_id=lang) else: self.processor = None print( "NeMo NLP is not available. Moses de-tokenization will be skipped." )
def __init__(self, input_case: str, lang: str = 'en', cache_dir: str = None, overwrite_cache: bool = False): if lang == 'en': from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst self.tagger = ClassifyFst( input_case=input_case, deterministic=False, cache_dir=cache_dir, overwrite_cache=overwrite_cache ) self.verbalizer = VerbalizeFinalFst(deterministic=False) else: super().__init__(input_case=input_case, lang=lang, deterministric=False) self.lang = lang
def __init__( self, input_case: str, lang: str = 'en', deterministric: bool = True, cache_dir: str = None, overwrite_cache: bool = False, ): assert input_case in ["lower_cased", "cased"] if lang == 'en': from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst self.tagger = ClassifyFst(input_case=input_case, deterministic=deterministric, cache_dir=cache_dir, overwrite_cache=overwrite_cache) self.verbalizer = VerbalizeFinalFst(deterministic=deterministric) self.parser = TokenParser()