Python TextNormalizer.TextNormalizer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils.normalizer

클래스/타입: TextNormalizer

메소드/함수: TextNormalizer

hotexamples.com에서의 예제들: 7

Python TextNormalizer.TextNormalizer - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.normalizer.TextNormalizer.TextNormalizer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

process(8)

TextNormalizer(7)

자주 사용되는 메소드들

process (8)

TextNormalizer (7)

예제 #1

파일 보기

 def read(self, data_path: str, split: str) -> Iterable[DataExample]:
     input_path = self.get_split_path(data_path, split)
     normalizer = TextNormalizer()
     with open(input_path, "r", encoding="utf-8") as input_file:
         for idx, line in enumerate(input_file):
             if idx == 0: continue
             values = line.split("\t")
             input1: str = normalizer.process(values[1].strip())
             input2: str = normalizer.process(values[2].strip())
             relatedness: float = float(values[3].strip())
             entailment: str = values[4].strip()
             yield self.create_example(input1, input2, relatedness,
                                       entailment)

예제 #2

파일 보기

 def read(self, data_path: str, split: str) -> Iterable[DataExample]:
     split = split if split == "train" else f"out-{split}"
     path = self.get_split_path(data_path, split)
     normalizer = TextNormalizer()
     with open(path, "r", encoding="utf-8") as input_file:
         for line in input_file:
             words = line.split()
             label = words[-1]
             text = " ".join(words[0:-1])
             text = text.replace(" em ", "em ").replace(" śmy ",
                                                        "śmy ").replace(
                                                            " m ", "m ")
             text = normalizer.process(text)
             yield DataExample(text, label)

예제 #3

파일 보기

 def read(self, data_path: str, split: str) -> Iterable[DataExample]:
     split_name = "training" if split == "train" else split
     file_pattern = "{}_set_clean_only_{}.txt"
     text_path = os.path.join(data_path, self._spec.task_path(),
                              file_pattern.format(split_name, "text"))
     tags_path = os.path.join(data_path, self._spec.task_path(),
                              file_pattern.format(split_name, "tags"))
     normalizer = TextNormalizer(detokenize=False)
     with open(text_path, "r", encoding="utf-8") as text_file, open(
             tags_path, "r", encoding="utf-8") as tags_file:
         text_lines = text_file.readlines()
         tags_lines = tags_file.readlines()
         assert len(text_lines) == len(tags_lines)
         for idx in range(len(text_lines)):
             text = normalizer.process(text_lines[idx].strip())
             text = text.replace("@anonymized_account", "@ użytkownik")
             label = tags_lines[idx].strip()
             yield DataExample(text, label)

예제 #4

파일 보기

 def read_simple(self,
                 data_path: str,
                 split: str,
                 separator: str = " ",
                 label_first: bool = True,
                 normalize: bool = True):
     label_idx = 0 if label_first else 1
     text_idx = 1 if label_first else 0
     input_path = self.get_split_path(data_path, split)
     normalize_func = lambda val: val
     if normalize:
         normalizer = TextNormalizer()
         normalize_func = lambda val: normalizer.process(val)
     with open(input_path, "r", encoding="utf-8") as input_file:
         for line in input_file:
             values = line.split(sep=separator, maxsplit=1)
             label = values[label_idx]
             text = values[text_idx].strip()
             text = normalize_func(text)
             yield DataExample(text, label)

예제 #5

파일 보기

 def normalizer(self) -> TextNormalizer:
     return TextNormalizer(detokenize=False)

예제 #6

파일 보기

 def normalizer(self):
     return TextNormalizer()

예제 #7

파일 보기

 def normalizer(self):
     return TextNormalizer(detokenize=False, lang="en")