def _test_distributed_load_state_dict(global_rank, world_size, gpu_id): T5.from_pretrained_module( "t5-small", ddp_accelerator=FairScaleFsdpAccelerator(local_rank=global_rank, world_size=world_size, cuda_device=gpu_id), )
def __init__(self, vocab: Vocabulary, model_name: str, beam_search: Lazy[BeamSearch] = Lazy(BeamSearch, beam_size=3, max_steps=50), checkpoint_wrapper: Optional[CheckpointWrapper] = None, weights_path: Optional[Union[str, PathLike]] = None, **kwargs) -> None: super().__init__(vocab, **kwargs) self._model_name = model_name # We only instantiate this when we need it. self._tokenizer: Optional[PretrainedTransformerTokenizer] = None self.t5 = T5Module.from_pretrained_module( model_name, beam_search=beam_search, ddp_accelerator=self.ddp_accelerator, checkpoint_wrapper=checkpoint_wrapper, weights_path=weights_path, ) exclude_indices = { self.t5.pad_token_id, self.t5.decoder_start_token_id, self.t5.eos_token_id, } self._metrics = [ ROUGE(exclude_indices=exclude_indices), BLEU(exclude_indices=exclude_indices), ]
def __init__(self, vocab: Vocabulary, model_name: str, **kwargs) -> None: super().__init__(vocab, **kwargs) self._model_name = model_name # We only instantiate this when we need it. self._tokenizer: Optional[PretrainedTransformerTokenizer] = None self.t5 = T5Module.from_pretrained_module(model_name) exclude_indices = { self.t5.pad_token_id, self.t5.decoder_start_token_id, self.t5.eos_token_id, } self._metrics = [ ROUGE(exclude_indices=exclude_indices), BLEU(exclude_indices=exclude_indices), ]
def model(model_name): model = T5.from_pretrained_module(model_name).eval() model.beam_search.max_steps = 5 return model
def test_create_t5_from_pretrained(pretrained_model_name: str): T5.from_pretrained_module(pretrained_model_name)
def test_create_t5_from_pretrained(pretrained_model_name: str): model = T5.from_pretrained_module(pretrained_model_name) # Make sure weights are tied. assert id(model.token_embeddings.weight) == id(model.lm_head.weight)