def test_inference_no_head(self): model = M2M100Model.from_pretrained("facebook/m2m100_418M").to(torch_device) input_ids = _long_tensor([[128028, 98, 12, 30527, 2732, 159, 7755, 61904, 39144, 38, 2]]) decoder_input_ids = _long_tensor([[2, 128028, 98, 12, 30527, 2732, 159, 7755, 61904, 39144, 38]]) inputs_dict = prepare_m2m_100_inputs_dict(model.config, input_ids, decoder_input_ids) with torch.no_grad(): output = model(**inputs_dict)[0] expected_shape = torch.Size((1, 11, 1024)) self.assertEqual(output.shape, expected_shape) # change to expected output here expected_slice = torch.tensor( [[-0.7780, -0.1676, 0.1038], [-6.7556, -1.3992, 0.0567], [-7.5383, -0.5920, -0.2779]], device=torch_device ) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
def load(args): # The below line is not useful. Maybe deleted later print('loading M2M-100 model') device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") ''' tokenizer = BertTokenizer.from_pretrained(args.m2m100_model, do_lower_case=True, cache_dir=args.cache_dir) model = BertModel.from_pretrained(args.m2m100_model, cache_dir=args.cache_dir) ''' model = M2M100Model.from_pretrained('facebook/m2m100_418M') tokenizer = M2M100Tokenizer.from_pretrained('facebook/m2m100_418M') model.to(device) if args.num_gpus > 1: model = torch.nn.DataParallel(model) model.eval() return model, tokenizer, device