Пример #1
0
 def add_args(parser):
     TransformerLanguageModel.add_args(parser)
     parser.add_argument(
         '--add-number-token-attention-mask',
         action="store_true",
         default=False,
         help="add self-attention masking to force inner-digits attention")
     parser.add_argument(
         '--add-number-rotation-embedding',
         action="store_true",
         default=False,
         help="add rotation embedding to the <bon> based on number value")
Пример #2
0
def load_lm(save_path, checkpoint_name, bpe_code):
    lm = TransformerLanguageModel.from_pretrained(save_path,
                                                  checkpoint_name,
                                                  tokenizer='moses',
                                                  bpe='fastbpe',
                                                  bpe_codes=bpe_code)
    lm.eval()
    lm.cuda()
    return lm
Пример #3
0
def load_lm(lm_path, model_type, dict_path):
    path, checkpoint = os.path.split(lm_path)
    if model_type == "convlm":
        model_handle = FConvLanguageModel.from_pretrained(
            path, checkpoint,
            os.path.split(dict_path)[0])
    elif model_type == "transformer":
        model_handle = TransformerLanguageModel.from_pretrained(
            path, checkpoint,
            os.path.split(dict_path)[0])
    else:
        raise Exception(
            "Unsupported language model type: use 'convlm' or 'transformer' models"
        )
    model = model_handle.models[0].decoder.cuda()
    model.eval()
    print(model)
    return model
Пример #4
0
def main():

    args = parse_args()

    logging.basicConfig(level=logging.DEBUG)
    logging.debug(args)

    tokens = args.prefix.split(" ")
    num_tokens = len(tokens)

    assert args.sample_length >= num_tokens, "--sample-length (%d) must be equal to or higher than length of --prefix (%d)" % (
        args.sample_length, num_tokens)

    actual_length = args.sample_length - num_tokens

    custom_lm = TransformerLanguageModel.from_pretrained(
        args.model_dir,
        'checkpoint_best.pt',
        verbose=args.verbose,
        max_len_b=actual_length)
    print(custom_lm.sample(args.prefix, beam=5))
Пример #5
0
def main():

    args = parse_args()

    logging.basicConfig(level=logging.DEBUG)
    logging.debug(args)

    lm = TransformerLanguageModel.from_pretrained(args.model, 'checkpoint_best.pt')

    # disable dropout

    lm.eval()

    if args.cuda:
        lm.cuda()

    with open(args.input, "r") as infile:
        num_lines = sum(1 for line in infile)
        logging.debug("Number of lines in input file: %d" % num_lines)

    seen = 0

    with open(args.input, "r") as infile, open(args.output, "w") as outfile:
        for line in infile:
            line = line.strip()

            score = lm.score(line)['positional_scores'].mean()

            if args.score_type in [SCORE_TYPE_PPL, SCORE_TYPE_NEGLOGPROB]:
                score = score.neg()

            if args.score_type == SCORE_TYPE_PPL:
                score = score.exp()

            outfile.write("%f\n" % score)

            seen += 1

            if seen % LOG_INTERVAL == 0:
                logging.debug("Processed lines: %d / %d" % (seen, num_lines))
print(output)
# output = ru_lm.sample('Дмитрий Карпов - это ', beam=1, sampling=True, sampling_topk=10, temperature=0.6)
# print(output)
# output = ru_lm.sample('Михаил Бурцев - это ', beam=1, sampling=True, sampling_topk=10, temperature=0.7)
# print(output)
# output = ru_lm.sample('Диляра Баймурзина - это ', beam=1, sampling=True, sampling_topk=10, temperature=0.8)
# print(output)
import ipdb

ipdb.set_trace()

print(output)

#
# ################################################################
from fairseq.models.transformer_lm import TransformerLanguageModel

custom_lm = TransformerLanguageModel.from_pretrained(
    '/home/alx/Cloud/spell_corr/py__spelling_corrector/language_models/fairseq_transformer_lm',
    'checkpoint100.pt',
    tokenizer='moses',
    bpe='fastbpe')
custom_lm.sample('Barack Obama', beam=5)
# Sample from the language model
# ru_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8)
# "Barack Obama is coming to Sydney and New Zealand (...)"
# #
# # # The same interface can be used with custom models as well
# # from fairseq.models.transformer_lm import TransformerLanguageModel
# # custom_lm = TransformerLanguageModel.from_pretrained('/path/to/model/dir', 'checkpoint100.pt', tokenizer='moses', bpe='fastbpe')
# # custom_lm.sample('Barack Obama', beam=5)
Пример #7
0
 def add_args(parser):
     TransformerLanguageModel.add_args(parser)
Пример #8
0
from fairseq.models.transformer_lm import TransformerLanguageModel

lg = "it"

model = TransformerLanguageModel.from_pretrained(
    f"models/word/{lg}/transformer",
    checkpoint_file="checkpoint_best.pt",
    data_name_or_path=f"models/word/{lg}/transformer/bin/",
)
model.eval()
Пример #9
0
 def __init__(self, lm_model_path):
     self.lm = TransformerLanguageModel.from_pretrained(
         lm_model_path, 'checkpoint_best.pt', tokenizer='moses')
     self.lm.eval()
     if torch.cuda.is_available():
         self.lm.cuda()