示例#1
0
 def test_inference_no_head(self):
     model = M2M100Model.from_pretrained("facebook/m2m100_418M").to(torch_device)
     input_ids = _long_tensor([[128028, 98, 12, 30527, 2732, 159, 7755, 61904, 39144, 38, 2]])
     decoder_input_ids = _long_tensor([[2, 128028, 98, 12, 30527, 2732, 159, 7755, 61904, 39144, 38]])
     inputs_dict = prepare_m2m_100_inputs_dict(model.config, input_ids, decoder_input_ids)
     with torch.no_grad():
         output = model(**inputs_dict)[0]
     expected_shape = torch.Size((1, 11, 1024))
     self.assertEqual(output.shape, expected_shape)
     # change to expected output here
     expected_slice = torch.tensor(
         [[-0.7780, -0.1676, 0.1038], [-6.7556, -1.3992, 0.0567], [-7.5383, -0.5920, -0.2779]], device=torch_device
     )
     self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
示例#2
0
def load(args):
    # The below line is not useful. Maybe deleted later
    print('loading M2M-100 model')
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    '''
    tokenizer = BertTokenizer.from_pretrained(args.m2m100_model, do_lower_case=True, cache_dir=args.cache_dir)
    model = BertModel.from_pretrained(args.m2m100_model, cache_dir=args.cache_dir)
    '''
    model = M2M100Model.from_pretrained('facebook/m2m100_418M')
    tokenizer = M2M100Tokenizer.from_pretrained('facebook/m2m100_418M')
    model.to(device)
    if args.num_gpus > 1:
        model = torch.nn.DataParallel(model)
    model.eval()
    return model, tokenizer, device