def test_transformer_multi_gpu_trainable(model_dict): # make args model_args = make_transformer_args(**model_dict) # setup batch idim = 5 odim = 10 ilens = [10, 5] olens = [20, 15] device = torch.device("cuda") batch = prepare_inputs(idim, odim, ilens, olens, model_args["spk_embed_dim"], device=device) # define model ngpu = 2 device_ids = list(range(ngpu)) model = Transformer(idim, odim, Namespace(**model_args)) model = torch.nn.DataParallel(model, device_ids) model.to(device) optimizer = torch.optim.Adam(model.parameters()) # trainable loss = model(**batch).mean() optimizer.zero_grad() loss.backward() optimizer.step() # check gradient of ScaledPositionalEncoding if model.module.use_scaled_pos_enc: assert model.module.encoder.embed[1].alpha.grad is not None assert model.module.decoder.embed[1].alpha.grad is not None
def test_transformer_gpu_trainable(model_dict): # make args model_args = make_transformer_args(**model_dict) idim = 5 odim = 10 ilens = [10, 5] olens = [20, 15] device = torch.device('cuda') batch = prepare_inputs(idim, odim, ilens, olens, device=device) # define model model = Transformer(idim, odim, Namespace(**model_args)) model.to(device) optimizer = torch.optim.Adam(model.parameters()) # trainable loss = model(**batch).mean() optimizer.zero_grad() loss.backward() optimizer.step() # check gradient of ScaledPositionalEncoding if model.use_scaled_pos_enc: assert model.encoder.embed[1].alpha.grad is not None assert model.decoder.embed[1].alpha.grad is not None
def test_transformer_trainable_and_decodable(model_dict): # make args model_args = make_transformer_args(**model_dict) inference_args = make_inference_args() # setup batch idim = 5 odim = 10 ilens = [10, 5] olens = [20, 15] batch = prepare_inputs(idim, odim, ilens, olens) # define model model = Transformer(idim, odim, Namespace(**model_args)) optimizer = torch.optim.Adam(model.parameters()) # trainable loss = model(**batch).mean() optimizer.zero_grad() loss.backward() optimizer.step() # check gradient of ScaledPositionalEncoding if model.use_scaled_pos_enc: assert model.encoder.embed[1].alpha.grad is not None assert model.decoder.embed[1].alpha.grad is not None # decodable model.eval() with torch.no_grad(): model.inference(batch["xs"][0][:batch["ilens"][0]], Namespace(**inference_args)) model.calculate_all_attentions(**batch)
def _load_teacher_model(self, model_path): # get teacher model config idim, odim, args = get_model_conf(model_path) # assert dimension is the same between teacher and studnet assert idim == self.idim assert odim == self.odim assert args.reduction_factor == self.reduction_factor # load teacher model model = Transformer(idim, odim, args) torch_load(model_path, model) # freeze teacher model parameters for p in model.parameters(): p.requires_grad = False return model