def build_model(cls, args, task): # set any default arguments transformer_align(args) transformer_model = TransformerModel.build_model(args, task) return TransformerAlignModel(transformer_model.encoder, transformer_model.decoder, args)
def load_ensemble_for_inference(filenames): """Load an ensemble of models for inference. model_arg_overrides allows you to pass a dictionary model_arg_overrides -- {'arg_name': arg} -- to override model args that were used during model training """ # load model architectures and weights states = [] for filename in filenames: if not os.path.exists(filename): raise IOError('Model file not found: {}'.format(filename)) state = torch.load( filename, map_location=lambda s, l: default_restore_location(s, 'cpu')) states.append(state) ensemble = [] for state in states: args = state['args'] # build model for ensemble model = TransformerModel.build_model(args) model.load_state_dict(state['model'], strict=True) ensemble.append(model) src_dict = states[0]['extra_state']['src_dict'] tgt_dict = states[0]['extra_state']['tgt_dict'] return ensemble, args, src_dict, tgt_dict
def test_export_transformer(self): task, parser = get_dummy_task_and_parser() TransformerModel.add_args(parser) args = parser.parse_args([]) model = TransformerModel.build_model(args, task) scripted = torch.jit.script(model) _test_save_and_load(scripted)
def build_transformer(vocab, d=16, ffn_dim=32, encoder_layers=2, decoder_layers=2, attention_heads=2): FakeTask = namedtuple('FakeTask', 'source_dictionary target_dictionary') fake_task = FakeTask(vocab, vocab) args = Namespace(**FAIRSEQ_DEFAULT_ARGS) args.share_all_embeddings = True args.encoder_embed_dim = d args.encoder_ffn_embed_dim = ffn_dim args.encoder_layers = encoder_layers args.encoder_attention_heads = attention_heads args.decoder_embed_dim = d args.decoder_ffn_embed_dim = ffn_dim args.decoder_attention_heads = attention_heads args.decoder_layers = decoder_layers args.tie_adaptive_weights = False args.decoder_output_dim = d args.decoder_input_dim = d return TransformerModel.build_model(args, fake_task)
def test_sets_inference_dropout_to_true(self): self.args.retain_dropout = True self.transformer_model = TransformerModel.build_model(self.args, self.task) self.transformer_model.prepare_for_inference_(self.args) assert self.transformer_model.encoder.dropout_module.apply_during_inference assert self.transformer_model.decoder.dropout_module.apply_during_inference for layer in self.transformer_model.encoder.layers: assert layer.dropout_module.apply_during_inference
def test_export_transformer_no_token_pos_emb(self): task, parser = get_dummy_task_and_parser() TransformerModel.add_args(parser) args = parser.parse_args([]) args.no_token_positional_embeddings = True model = TransformerModel.build_model(args, task) scripted = torch.jit.script(model) _test_save_and_load(scripted)
def test_retain_modules(self): self.args.retain_dropout = True self.args.retain_dropout_modules = ['TransformerEncoder', 'TransformerEncoderLayer'] self.transformer_model = TransformerModel.build_model(self.args, self.task) self.transformer_model.prepare_for_inference_(self.args) assert self.transformer_model.encoder.dropout_module.apply_during_inference assert not self.transformer_model.decoder.dropout_module.apply_during_inference for layer in self.transformer_model.decoder.layers: assert not layer.dropout_module.apply_during_inference
def test_inference_dropout_false_by_default(self): self.transformer_model = TransformerModel.build_model(self.args, self.task) self.transformer_model.prepare_for_inference_(self.args) assert not self.transformer_model.encoder.dropout_module.apply_during_inference assert not self.transformer_model.decoder.dropout_module.apply_during_inference for layer in self.transformer_model.encoder.layers: assert not layer.dropout_module.apply_during_inference for layer in self.transformer_model.decoder.layers: assert not layer.dropout_module.apply_during_inference
def test_applies_training_mode(self): self.transformer_model = TransformerModel.build_model(self.args, self.task) assert self.transformer_model.encoder.dropout_module.training for layer in self.transformer_model.encoder.layers: assert layer.dropout_module.training self.transformer_model.eval() assert not self.transformer_model.decoder.dropout_module.training for layer in self.transformer_model.encoder.layers: assert not layer.dropout_module.training
def build_model(cls, args, task): tmodel = TransformerModel.build_model(args, task) model = SimpleTransformerModel(tmodel) mode = { e.split('=')[0]: e.split('=')[1] if len(e.split('=')) > 1 else None for e in args.user_mode.split(',') } model.user_mode = mode model.decoder = ProxyDecoder(tmodel, model.user_mode) model.encoder = ProxyEncoder(tmodel, model.user_mode) return model
def build_model(self, args): """ Build the :class:`~fairseq.models.BaseFairseqModel` instance for this task. Args: args (argparse.Namespace): parsed command-line arguments Returns: a :class:`~fairseq.models.BaseFairseqModel` instance """ from fairseq.models.transformer import TransformerModel return TransformerModel.build_model(args, self)
def test_retain_modules(self): self.args.retain_dropout = True self.args.retain_dropout_modules = [ "TransformerEncoder", "TransformerEncoderLayer", ] self.transformer_model = TransformerModel.build_model(self.args, self.task) cfg = convert_namespace_to_omegaconf(self.args) self.transformer_model.prepare_for_inference_(cfg) assert self.transformer_model.encoder.dropout_module.apply_during_inference assert not self.transformer_model.decoder.dropout_module.apply_during_inference for layer in self.transformer_model.decoder.layers: assert not layer.dropout_module.apply_during_inference
def setUp(self): self.task, self.parser = get_dummy_task_and_parser() eos = self.task.tgt_dict.eos() src_tokens = torch.randint(3, 50, (2, 10)).long() src_tokens = torch.cat((src_tokens, torch.LongTensor([[eos], [eos]])), -1) src_lengths = torch.LongTensor([2, 10]) self.sample = { "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths} } TransformerModel.add_args(self.parser) args = self.parser.parse_args([]) args.encoder_layers = 2 args.decoder_layers = 1 self.transformer_model = TransformerModel.build_model(args, self.task)
def __init__(self): super().__init__() task, parser = _get_dummy_task_and_parser() TransformerModel.add_args(parser) args = parser.parse_args([]) args.encoder_layers = 2 args.decoder_layers = 1 transformer_model = TransformerModel.build_model(args, task) self.sequence_generator = SequenceGenerator( [transformer_model], task.tgt_dict, beam_size=2, no_repeat_ngram_size=2, max_len_b=10, )
def get_model_and_args(model_args): ''' the arguments initialize_model will receive ''' parser = argparse.ArgumentParser() ## Required parameters by the model. parser.add_argument("--checkpoint", default=None, type=str, required=True, help="The checkpoint of the model. ") parser.add_argument('--batch-size', default=10240, type=int, help='Batch size for inference') parser.add_argument('--num-batches', default=2, type=int, help='Number of batches to check accuracy on') parser.add_argument("--data", default=None, type=str, required=True, help="Path to the dataset") parser.add_argument('--part', choices=['encoder', 'decoder', 'model'], default='model', type=str, help='Choose the part of the model to export') args = parser.parse_args(model_args) state_dict = torch.load(args.checkpoint, map_location='cpu') model_args = state_dict['args'] model_args.data = args.data model_args.num_batches = args.num_batches model_args.max_tokens = args.batch_size model_args.fuse_layer_norm = False model_args.part = args.part model = TransformerModel.build_model(model_args) model.load_state_dict(state_dict['model'], strict=True) model.make_generation_fast_(need_attn=False) return model, model_args
def _create_model(cls, checkpoints, device, beam_size, use_fp16): model = TransformerModel.build_model(checkpoints.args, checkpoints.task) # Custom make_generation_fast_ eval_fn, train_fn = model.eval, model.train model.eval = lambda: None model.make_generation_fast_( beamable_mm_beam_size=None if beam_size == 0 else beam_size, need_attn=True, # --print-alignment ) model.eval, model.train = eval_fn, train_fn if device is not None: torch.cuda.set_device(device) model = model.cuda(device) if use_fp16: model.half() return model