def model_training(t_alpha, t_beta, t_gamma): #model_evaluation() dataloader = get_dataloader(squad_train_path) dev_data = load_dev_data() cvae.train() print("training ..", flush=True) for epoch in range(n_epochs): tot_n_loss = 0 tot_p_loss = 0 # as mentioned by the author, turning it off. # if epoch % anneal_alpha == 0: # t_alpha = min(t_alpha*2, 1) if epoch % anneal_beta == 0: t_beta = min(t_beta * 2, 1) for i, (q, p_ans, n_ans) in enumerate(dataloader): tot_p_loss += p_loss.item() tot_n_loss += n_loss.item() if i % p_epochs == 0: print( f'epoch: {epoch}/{n_epochs}, steps: {i}/{len(dataloader)}, c_p_loss: {p_loss.item():.2f}, p_loss: {(tot_p_loss/(i+1)):.2f}, c_n_loss: {n_loss.item():.2f}, n_loss {(tot_n_loss/(i+1)):.2f}', flush=True) torch.save({'cvae': cvae.state_dict()}, model_path) if i % e_epochs == 0: model_evaluation(dev_data, 'train') cvae.train()
def main(args): if args.dataset == 'SYDNEY': from load_data import Load_Sydney_Demand_Data data = Load_Sydney_Demand_Data( os.path.join(base_dir, '1h_data_new3.csv')) data = np.expand_dims(data, axis=-1) args.dim = 1 print(data.shape) adj = generate_graph_with_data(data, len(data), threshold=args.threshold) adj = torch.from_numpy(Cheb_Poly(Scaled_Laplacian(adj), 2)).type(torch.float32) model = Network(adj, args, dropout=0.15) print_model_parameters(model) model.apply(init_weights) model = model.to(args.device) optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr_init, betas=(0.8, 0.999), eps=1e-7) lr_scheduler = init_lr_scheduler(optimizer, args) criterion = nn.MSELoss(reduction='sum') criterion.to(args.device) train_dataloader, val_dataloader, test_dataloader, scaler = get_dataloader( args.dataset, args.batch_size, args.window, args.horizon, args.valdays, args.testdays, normalizer='max') print('************START TRAINING************') n_batch = len(train_dataloader) / args.batch_size #1920/ path = '/home/canli/upload_file/save_model/' for epoch in range(1, args.epochs + 1): train_epoch_loss = 0 epoch_norm = 0 model.train() for index, (x, y) in enumerate(train_dataloader): optimizer.zero_grad() train_pred = model(x) train_loss = criterion(train_pred, y) train_loss.backward() grad_norm = check_gradients(model) epoch_norm = epoch_norm + grad_norm optimizer.step() train_epoch_loss = train_epoch_loss + train_loss.data print('Epoch {}/{}: train loss: {:.4f}, grad norm: {:.6f}'.format( epoch, args.epochs, train_epoch_loss, (epoch_norm / n_batch))) lr_scheduler.step() torch.save(model.state_dict(), path + str(epoch) + 'para_model.pkl') val_mae, val_rmse, val_mape = eval(model, val_dataloader, scaler) print('Val---MAE: {:.4f}, RMSE: {:.4f}, MAPE: {:.4f}'.format( val_mae, val_rmse, val_mape))
def main(args): if args.dataset == 'SYDNEY': from load_data import Load_Sydney_Demand_Data data = Load_Sydney_Demand_Data( os.path.join(base_dir, '1h_data_new3.csv')) data = np.expand_dims(data, axis=-1) args.dim = 1 print(data.shape) adj = generate_graph_with_data(data, len(data), threshold=args.threshold) adj = torch.from_numpy(Cheb_Poly(Scaled_Laplacian(adj), 2)).type(torch.float32) model = Network(adj, args, dropout=0.15) model_path = '/home/canli/upload_file/save_model/15para_model.pkl' model.load_state_dict(torch.load(model_path)) print_model_parameters(model) model = model.to(args.device) train_dataloader, val_dataloader, test_dataloader, scaler = get_dataloader( args.dataset, args.batch_size, args.window, normalizer='max') pred_matrix = [] pred_tensor = torch.Tensor().cuda() for i in range(10): test_mae, test_rmse, test_mape, pred, true = eval( model, test_dataloader, scaler) print('Test---MAE: {:.4f}, RMSE: {:.4f}, MAPE: {:.4f}'.format( test_mae, test_rmse, test_mape)) pred = pred.squeeze() true = true.squeeze() pred = pred.unsqueeze(0) pred_tensor = torch.cat((pred_tensor, pred), 0) ave = torch.mean(pred_tensor, dim=0) std = torch.std(pred_tensor, dim=0) ave = ave.cpu().detach().numpy() std = std.cpu().detach().numpy() true = true.cpu().detach().numpy() interval = stats.norm.interval(0.95, ave, std) span = interval[1] - interval[0] compare = (true < interval[1]) & (true > interval[0]) per = np.sum(compare) / (ave.shape[0] * ave.shape[1]) print(per) print(np.mean(span))
elif opt.model == "dip_vae": parameter = opt.lambda_diag else: parameter = 0 out_path = ( f"../results/{opt.dataset}/{opt.model}/parameter_{parameter}/seed_{opt.seed}" ) os.makedirs(out_path, exist_ok=True) # check for GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load data print("Loading data") train_dataloader = get_dataloader(opt) test_dataloader = None n = len(train_dataloader.dataset) iter_per_epoch = math.ceil(n / opt.batch_size) # run start = time.time() print("Training") vae = ConvVAE(opt).to(device) optimizer = optim.Adam(vae.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) discriminator = Discriminator(opt.latent_dim).to(device) optimizer_d = optim.Adam(discriminator.parameters(), lr=opt.lrd, betas=(opt.b1d, opt.b2d))
def ids2words(lang, ids): return [lang.index2word[idx] for idx in ids] def greedy_decode(model, dataloader, input_lang, output_lang): with torch.no_grad(): batch = next(iter(dataloader)) input_tensor = batch[0] input_mask = batch[1] target_tensor = batch[2] decoder_outputs, decoder_hidden = model(input_tensor, input_mask) topv, topi = decoder_outputs.topk(1) decoded_ids = topi.squeeze() for idx in range(input_tensor.size(0)): input_sent = ids2words(input_lang, input_tensor[idx].cpu().numpy()) output_sent = ids2words(output_lang, decoded_ids[idx].cpu().numpy()) target_sent = ids2words(output_lang, target_tensor[idx].cpu().numpy()) print('Input: {}'.format(input_sent)) print('Target: {}'.format(target_sent)) print('Output: {}'.format(output_sent)) if __name__ == '__main__': input_lang, output_lang, train_dataloader = load_data.get_dataloader(batch_size) model = model.EncoderDecoder(hidden_size, input_lang.n_words, output_lang.n_words).to(device) train(train_dataloader, model, n_epochs=20) greedy_decode(model, train_dataloader, input_lang, output_lang)
import torch.nn.functional as f import numpy as np from load_data import get_dataloader from model import JDDA from itertools import cycle class_num = 10 batch_size = 128 total_iters = 200200 lr = 0.0001 discriminative_loss_param = 0.03 ##0.03 for InstanceBased method, 0.01 for CenterBased method domain_loss_param = 8 device = torch.device('cuda:2') source_dataloader = get_dataloader('mnist', batch_size=batch_size, split='train') target_dataloader = get_dataloader('mnistm', batch_size=batch_size, split='train') test_dataloader = get_dataloader('mnistm', batch_size=batch_size, split='test') model = JDDA() model = model.to(device) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) # since length of svhn and mnist is different # origin author train steps 200200 # and drop the data that batch size is not 128
total_loss += loss.item() if (i+1) % print_every == 0: avg_loss = total_loss / float(print_every) print_progress((time.time() - start)//60, epoch+1, i+1, avg_loss) total_loss = 0 #if (i+1) % SAVE_ITERS == 0: # save(epoch, i+1, NAME, model, optimizer) avg_loss = total_loss / max(1, (i+1) % print_every) print_progress((time.time() - start)//60, epoch+1, i+1, avg_loss) save(epoch, model, optimizer) dataloader, dataset = get_dataloader(args.batch_size, args.data_path, max_len=args.max_length) print("Loaded {0} samples from {1}".format(len(dataset), args.data_path)) print("Initializing Transformer...") model = Transformer(ALPHABET_SIZE, args.embedding_size, args.num_layers) if torch.cuda.is_available() and not args.cpu: model = torch.nn.DataParallel(model) model = model.to(DEVICE) print("Transformer Initialized on device(s):", DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98), eps=1e-9) sched = CosineWithRestarts(optimizer, T_max=len(dataloader)) epoch = 0 if args.checkpoint_path is not None:
def train(style_list, content_list, batch_size, num_epochs, style_weight, content_weight, ngf, log_interval, save_model_dir): ######################## # Data loader ######################## content_loader = load_data.get_dataloader(content_list, batch_size) style_loader = load_data.get_dataloader(style_list, batch_size) ######################## # Init model ######################## vgg = basic_block.Vgg() style_model = basic_block.Net(ngf) ######################## # optimizer and loss ######################## mse_loss = tf.keras.losses.mean_squared_error() optimizer = tf.keras.optimizers.Adam() ######################## # Start training loop ######################## for epoch in range(1, num_epochs): agg_content_loss = 0.0 agg_style_loss = 0.0 count = 0 for batch_id, content_img in enumerate(content_loader): with tf.GradientTape() as tape: n_batch = len(content_img) count += n_batch # data preparation. TODO: figure out these helper functions style_image = next(style_loader) #style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy()) feature_style = vgg(style_image) gram_style = [ basic_block.gram_matrix(y) for y in feature_style ] f_xc_c = vgg(content_img)[1] style_model.set_target(style_image) y = style_model(content_img) features_y = vgg(y) # TODO: why the coefficient 2? content_loss = 2 * content_weight * mse_loss( features_y[1], f_xc_c) style_loss = 0.0 for m in range(len(features_y)): gram_y = basic_block.gram_matrix(features_y[m]) _, C, _ = gram_style[m].shape gram_s = tf.expand_dims(gram_style[m], 0).broadcast_to( batch_size, 1, C, C) style_loss += 2 * style_weight * mse_loss( gram_y, gram_s[:n_batch, :, :]) total_loss = content_loss + style_loss agg_content_loss += content_loss[0] agg_style_loss += style_loss[0] gradients = tape.gradient(total_loss, style_model.variables) optimizer.apply_gradients( zip(gradients, style_model.trainable_variables)) if (batch_id + 1) % log_interval == 0: mesg = "{}\tEpoch {}:\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format( time.ctime(), epoch + 1, agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1)) print(mesg) if (batch_id + 1) % (4 * log_interval) == 0: # save model save_model_filename = "Epoch_" + str(epoch) + "iters_" + \ str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( content_weight) + "_" + str(style_weight) + ".params" save_model_path = os.path.join(save_model_dir, save_model_filename) tf.saved_model.save(style_model, save_model_path) print("\nCheckpoint, trained model saved at", save_model_path)