示例#1
0
 def test_moses_truecase_file(self):
     moses = MosesTruecaser()
     # Train the model from file.
     moses.train_from_file('big.txt')
     # Test all self.input_output test cases.
     for _input, _output in self.input_output.items():
         assert moses.truecase(_input) == _output
示例#2
0
def truecase_file(modelfile, processes, is_asr, encoding):
    moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr, encoding=encoding)
    moses_truecase = partial(moses.truecase, return_str=True)
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        with click.get_text_stream("stdout", encoding=encoding) as fout:
            for line in tqdm(fin):
                print(moses.truecase(line, return_str=True), end="\n", file=fout)
示例#3
0
 def test_moses_truecase_documents(self):
     moses = MosesTruecaser()
     # Train the model from documents.
     docs = [line.split() for line in self.big_txt.split('\n')]
     moses.train(docs)
     # Test all self.input_output test cases.
     for _input, _output in self.input_output.items():
         assert moses.truecase(_input) == _output
示例#4
0
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token):
    moses = MosesTruecaser(is_asr=is_asr)
    with click.get_text_stream('stdin') as fin:
        model = moses.train_from_file_object(
            fin,
            possibly_use_first_token=possibly_use_first_token,
            processes=processes)
        moses.save_model(modelfile)
示例#5
0
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token, encoding):
    moses = MosesTruecaser(is_asr=is_asr, encoding=encoding)
    with click.get_text_stream("stdin", encoding=encoding) as fin:
        model = moses.train_from_file_object(
            fin,
            possibly_use_first_token=possibly_use_first_token,
            processes=processes,
            progress_bar=True,
        )
        moses.save_model(modelfile)
示例#6
0
文件: cli.py 项目: isi-nlp/sacremoses
def train_truecaser(iterator, language, processes, quiet, modelfile, is_asr,
                    possibly_use_first_token):
    moses = MosesTruecaser(is_asr=is_asr)
    #iterator_copy = deepcopy(iterator)
    model = moses.train(
        iterator,
        possibly_use_first_token=possibly_use_first_token,
        processes=processes,
        progress_bar=(not quiet),
    )
    moses.save_model(modelfile)
示例#7
0
文件: cli.py 项目: isi-nlp/sacremoses
def truecase_file(iterator, language, processes, quiet, modelfile, is_asr,
                  possibly_use_first_token):
    # If model file doesn't exists, train a model.
    if not os.path.isfile(modelfile):
        iterator_copy = deepcopy(iterator)
        truecaser = MosesTruecaser(is_asr=is_asr)
        model = truecaser.train(
            iterator_copy,
            possibly_use_first_token=possibly_use_first_token,
            processes=processes,
            progress_bar=(not quiet),
        )
        truecaser.save_model(modelfile)
    # Truecase the file.
    moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr)
    moses_truecase = partial(moses.truecase, return_str=True)
    return parallel_or_not(iterator, moses_truecase, processes, quiet)
示例#8
0
def truecase_file(modelfile, processes, is_asr):
    moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr)
    with click.get_text_stream('stdin') as fin, click.get_text_stream(
            'stdout') as fout:
        for line in tqdm(fin):
            print(moses.truecase(line, return_str=True), end='\n', file=fout)