return torch.max(margin, x) def gv_loss(z_no_lesion, z_with_lesion): mse_loss = nn.MSELoss(reduce=True) nl = vae.decode(z_no_lesion) wl = vae.decode(z_with_lesion) gv = gv_margin(mse_loss(z_no_lesion, z_with_lesion), torch.tensor([1]).float()) cnn_loss = 0.1 * cnn(nl) print(f"GV: {gv.data} CNN: {cnn_loss.data}") loss = gv + cnn_loss return loss, nl, wl transform = TRANSFORMER().to(device) criterion = nn.MSELoss(size_average=True, reduce=True) optimizer = torch.optim.Adam(transform.parameters(), lr=LEARNING_RATE, amsgrad=True) for (positive, _) in positive_loader: reconstruction, z_with_lesion = vae( positive) #taking a positive, extracting the recon and latent z for x in range(100): z_no_lesion = transform( z_with_lesion ) #transforming latent z (lesion) => latent z (no lesion) loss, decoded_nl, decoded_wl = gv_loss(z_no_lesion, z_with_lesion) optimizer.zero_grad() loss.sum().backward(retain_graph=True)
# This is where your model code will be called. You may modify this code # if required for your implementation, but it should not typically be necessary, # and you must let the TAs know if you do so. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1.-args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size=args.batch_size model.seq_len=args.seq_len model.vocab_size=vocab_size else: print("Model type not recognized.")
batch_size=args["batch_size"], vocab_size=vocab_size, num_layers=args["num_layers"], dp_keep_prob=args["dp_keep_prob"]) elif args["model"] == 'GRU': model = GRU(emb_size=args["emb_size"], hidden_size=args["hidden_size"], seq_len=args["seq_len"], batch_size=args["batch_size"], vocab_size=vocab_size, num_layers=args["num_layers"], dp_keep_prob=args["dp_keep_prob"]) elif args["model"] == 'TRANSFORMER': if args["debug"]: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args["hidden_size"], n_blocks=args["num_layers"], dropout=1. - args["dp_keep_prob"]) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = args["batch_size"] model.seq_len = args["seq_len"] model.vocab_size = vocab_size