示例#1
0
 def __init__(self, input_case: str, lang: str = 'en'):
     super().__init__(input_case=input_case, lang=lang)
     if lang == 'en':
         from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
         from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
     self.tagger = ClassifyFst(input_case=input_case, deterministic=False)
     self.verbalizer = VerbalizeFinalFst(deterministic=False)
示例#2
0
    def __init__(
        self,
        input_case: str,
        lang: str = 'en',
        deterministic: bool = True,
        cache_dir: str = None,
        overwrite_cache: bool = False,
    ):
        assert input_case in ["lower_cased", "cased"]

        if lang == 'en' and deterministic:
            from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
        elif lang == 'en' and not deterministic:
            from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ClassifyFst
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
        elif lang == 'ru':
            # Ru TN only support non-deterministic cases and produces multiple normalization options
            # use normalize_with_audio.py
            from nemo_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst

        self.tagger = ClassifyFst(input_case=input_case,
                                  deterministic=deterministic,
                                  cache_dir=cache_dir,
                                  overwrite_cache=overwrite_cache)
        self.verbalizer = VerbalizeFinalFst(deterministic=deterministic)
        self.parser = TokenParser()
        self.lang = lang
示例#3
0
    def __init__(self, input_case: str, lang: str = 'en'):
        assert input_case in ["lower_cased", "cased"]

        if lang == 'en':
            from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
        self.tagger = ClassifyFst(input_case=input_case, deterministic=True)
        self.verbalizer = VerbalizeFinalFst(deterministic=True)
        self.parser = TokenParser()
示例#4
0
文件: normalize.py 项目: sycomix/NeMo
    def __init__(
        self,
        input_case: str,
        lang: str = 'en',
        deterministic: bool = True,
        cache_dir: str = None,
        overwrite_cache: bool = False,
        whitelist: str = None,
        lm: bool = False,
    ):
        assert input_case in ["lower_cased", "cased"]

        if not PYNINI_AVAILABLE:
            raise ImportError(get_installation_msg())

        if lang == 'en' and deterministic:
            from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
        elif lang == 'en' and not deterministic:
            if lm:
                from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_lm import ClassifyFst
            else:
                from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import (
                    ClassifyFst, )
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
        elif lang == 'ru':
            # Ru TN only support non-deterministic cases and produces multiple normalization options
            # use normalize_with_audio.py
            from nemo_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst
        elif lang == 'de':
            from nemo_text_processing.text_normalization.de.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.de.verbalizers.verbalize_final import VerbalizeFinalFst
        elif lang == 'es':
            from nemo_text_processing.text_normalization.es.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.es.verbalizers.verbalize_final import VerbalizeFinalFst
        self.tagger = ClassifyFst(
            input_case=input_case,
            deterministic=deterministic,
            cache_dir=cache_dir,
            overwrite_cache=overwrite_cache,
            whitelist=whitelist,
        )
        self.verbalizer = VerbalizeFinalFst(deterministic=deterministic)
        self.parser = TokenParser()
        self.lang = lang

        if NLP_AVAILABLE:
            self.processor = MosesProcessor(lang_id=lang)
        else:
            self.processor = None
            print(
                "NeMo NLP is not available. Moses de-tokenization will be skipped."
            )
示例#5
0
    def __init__(self, input_case: str, lang: str = 'en', cache_dir: str = None, overwrite_cache: bool = False):
        if lang == 'en':
            from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify_with_audio import ClassifyFst
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst

            self.tagger = ClassifyFst(
                input_case=input_case, deterministic=False, cache_dir=cache_dir, overwrite_cache=overwrite_cache
            )
            self.verbalizer = VerbalizeFinalFst(deterministic=False)
        else:
            super().__init__(input_case=input_case, lang=lang, deterministric=False)
        self.lang = lang
示例#6
0
    def __init__(
        self,
        input_case: str,
        lang: str = 'en',
        deterministric: bool = True,
        cache_dir: str = None,
        overwrite_cache: bool = False,
    ):
        assert input_case in ["lower_cased", "cased"]

        if lang == 'en':
            from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
            from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
        self.tagger = ClassifyFst(input_case=input_case,
                                  deterministic=deterministric,
                                  cache_dir=cache_dir,
                                  overwrite_cache=overwrite_cache)
        self.verbalizer = VerbalizeFinalFst(deterministic=deterministric)
        self.parser = TokenParser()