def load_lm(save_path, checkpoint_name, bpe_code): lm = TransformerLanguageModel.from_pretrained(save_path, checkpoint_name, tokenizer='moses', bpe='fastbpe', bpe_codes=bpe_code) lm.eval() lm.cuda() return lm
def load_lm(lm_path, model_type, dict_path): path, checkpoint = os.path.split(lm_path) if model_type == "convlm": model_handle = FConvLanguageModel.from_pretrained( path, checkpoint, os.path.split(dict_path)[0]) elif model_type == "transformer": model_handle = TransformerLanguageModel.from_pretrained( path, checkpoint, os.path.split(dict_path)[0]) else: raise Exception( "Unsupported language model type: use 'convlm' or 'transformer' models" ) model = model_handle.models[0].decoder.cuda() model.eval() print(model) return model
def main(): args = parse_args() logging.basicConfig(level=logging.DEBUG) logging.debug(args) tokens = args.prefix.split(" ") num_tokens = len(tokens) assert args.sample_length >= num_tokens, "--sample-length (%d) must be equal to or higher than length of --prefix (%d)" % ( args.sample_length, num_tokens) actual_length = args.sample_length - num_tokens custom_lm = TransformerLanguageModel.from_pretrained( args.model_dir, 'checkpoint_best.pt', verbose=args.verbose, max_len_b=actual_length) print(custom_lm.sample(args.prefix, beam=5))
def main(): args = parse_args() logging.basicConfig(level=logging.DEBUG) logging.debug(args) lm = TransformerLanguageModel.from_pretrained(args.model, 'checkpoint_best.pt') # disable dropout lm.eval() if args.cuda: lm.cuda() with open(args.input, "r") as infile: num_lines = sum(1 for line in infile) logging.debug("Number of lines in input file: %d" % num_lines) seen = 0 with open(args.input, "r") as infile, open(args.output, "w") as outfile: for line in infile: line = line.strip() score = lm.score(line)['positional_scores'].mean() if args.score_type in [SCORE_TYPE_PPL, SCORE_TYPE_NEGLOGPROB]: score = score.neg() if args.score_type == SCORE_TYPE_PPL: score = score.exp() outfile.write("%f\n" % score) seen += 1 if seen % LOG_INTERVAL == 0: logging.debug("Processed lines: %d / %d" % (seen, num_lines))
print(output) # output = ru_lm.sample('Дмитрий Карпов - это ', beam=1, sampling=True, sampling_topk=10, temperature=0.6) # print(output) # output = ru_lm.sample('Михаил Бурцев - это ', beam=1, sampling=True, sampling_topk=10, temperature=0.7) # print(output) # output = ru_lm.sample('Диляра Баймурзина - это ', beam=1, sampling=True, sampling_topk=10, temperature=0.8) # print(output) import ipdb ipdb.set_trace() print(output) # # ################################################################ from fairseq.models.transformer_lm import TransformerLanguageModel custom_lm = TransformerLanguageModel.from_pretrained( '/home/alx/Cloud/spell_corr/py__spelling_corrector/language_models/fairseq_transformer_lm', 'checkpoint100.pt', tokenizer='moses', bpe='fastbpe') custom_lm.sample('Barack Obama', beam=5) # Sample from the language model # ru_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8) # "Barack Obama is coming to Sydney and New Zealand (...)" # # # # # The same interface can be used with custom models as well # # from fairseq.models.transformer_lm import TransformerLanguageModel # # custom_lm = TransformerLanguageModel.from_pretrained('/path/to/model/dir', 'checkpoint100.pt', tokenizer='moses', bpe='fastbpe') # # custom_lm.sample('Barack Obama', beam=5)
from fairseq.models.transformer_lm import TransformerLanguageModel lg = "it" model = TransformerLanguageModel.from_pretrained( f"models/word/{lg}/transformer", checkpoint_file="checkpoint_best.pt", data_name_or_path=f"models/word/{lg}/transformer/bin/", ) model.eval()
def __init__(self, lm_model_path): self.lm = TransformerLanguageModel.from_pretrained( lm_model_path, 'checkpoint_best.pt', tokenizer='moses') self.lm.eval() if torch.cuda.is_available(): self.lm.cuda()