def test_moses_truecase_file(self): moses = MosesTruecaser() # Train the model from file. moses.train_from_file('big.txt') # Test all self.input_output test cases. for _input, _output in self.input_output.items(): assert moses.truecase(_input) == _output
def truecase_file(modelfile, processes, is_asr, encoding): moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr, encoding=encoding) moses_truecase = partial(moses.truecase, return_str=True) with click.get_text_stream("stdin", encoding=encoding) as fin: with click.get_text_stream("stdout", encoding=encoding) as fout: for line in tqdm(fin): print(moses.truecase(line, return_str=True), end="\n", file=fout)
def test_moses_truecase_documents(self): moses = MosesTruecaser() # Train the model from documents. docs = [line.split() for line in self.big_txt.split('\n')] moses.train(docs) # Test all self.input_output test cases. for _input, _output in self.input_output.items(): assert moses.truecase(_input) == _output
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token): moses = MosesTruecaser(is_asr=is_asr) with click.get_text_stream('stdin') as fin: model = moses.train_from_file_object( fin, possibly_use_first_token=possibly_use_first_token, processes=processes) moses.save_model(modelfile)
def train_truecaser(modelfile, processes, is_asr, possibly_use_first_token, encoding): moses = MosesTruecaser(is_asr=is_asr, encoding=encoding) with click.get_text_stream("stdin", encoding=encoding) as fin: model = moses.train_from_file_object( fin, possibly_use_first_token=possibly_use_first_token, processes=processes, progress_bar=True, ) moses.save_model(modelfile)
def train_truecaser(iterator, language, processes, quiet, modelfile, is_asr, possibly_use_first_token): moses = MosesTruecaser(is_asr=is_asr) #iterator_copy = deepcopy(iterator) model = moses.train( iterator, possibly_use_first_token=possibly_use_first_token, processes=processes, progress_bar=(not quiet), ) moses.save_model(modelfile)
def truecase_file(iterator, language, processes, quiet, modelfile, is_asr, possibly_use_first_token): # If model file doesn't exists, train a model. if not os.path.isfile(modelfile): iterator_copy = deepcopy(iterator) truecaser = MosesTruecaser(is_asr=is_asr) model = truecaser.train( iterator_copy, possibly_use_first_token=possibly_use_first_token, processes=processes, progress_bar=(not quiet), ) truecaser.save_model(modelfile) # Truecase the file. moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr) moses_truecase = partial(moses.truecase, return_str=True) return parallel_or_not(iterator, moses_truecase, processes, quiet)
def truecase_file(modelfile, processes, is_asr): moses = MosesTruecaser(load_from=modelfile, is_asr=is_asr) with click.get_text_stream('stdin') as fin, click.get_text_stream( 'stdout') as fout: for line in tqdm(fin): print(moses.truecase(line, return_str=True), end='\n', file=fout)