def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] model = TCN(args, n_words, num_chans) if args.cuda: model.cuda() print("Parameters: " + str(sum(p.numel() for p in model.parameters()))) torch.backends.cudnn.benchmark = True # This makes dilated conv much faster for CuDNN 7.5 optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) # Start training loop best_model_name = "model_" + args.experiment_name + ".pt" best_vloss = 1e8 all_vloss = [] for epoch in range(1, args.epochs+1): epoch_start_time = time.time() try: train(model, optimizer, lr, epoch, clip) except OverflowError: return {'status': 'fail'} print("Validating...") val_loss = evaluate(model, val_data) if np.isnan(val_loss) or val_loss > 100: return {'status' : 'fail'} print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if val_loss < best_vloss: with open(best_model_name, 'wb') as f: print('Save model!\n') torch.save(model, f) best_vloss = val_loss # Anneal the learning rate if the validation loss plateaus if epoch > 10 and val_loss >= max(all_vloss[-5:]): lr = lr / 2. for param_group in optimizer.param_groups: param_group['lr'] = lr all_vloss.append(val_loss) return {"status" : "ok", "loss" : best_vloss, "model_name" : best_model_name}
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) print(args) n_channels = [args.nhid] * args.levels model = TCN(args.model, input_size, input_size, n_channels, args.ksize, dropout=args.dropout) print('Parameter count: ', str(sum(p.numel() for p in model.parameters()))) if args.cuda: model.cuda() #summary(model, (193, 88)) optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) best_vloss = 1e8 vloss_list = [] model_name = "model_" + str(args.data) + "_" + str( args.experiment_name) + ".pt" for ep in range(1, args.epochs + 1): train(model, ep, lr, optimizer, clip) vloss = evaluate(model, X_valid, name='Validation') if np.isnan(vloss) or vloss > 1000: return {'status': 'fail'} if vloss < best_vloss: with open(model_name, "wb") as f: torch.save(model, f) print("Saved model!\n") best_vloss = vloss if ep > 10 and vloss > max(vloss_list[-10:]): lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = lr vloss_list.append(vloss) return {'status': 'ok', 'loss': best_vloss, 'model_name': model_name}
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] model = TCN(args, n_characters, num_chans) if args.cuda: model.cuda() print("Parameters: " + str(sum(p.numel() for p in model.parameters()))) torch.backends.cudnn.benchmark = True # This makes dilated conv much faster for CuDNN 7.5 optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) # Start training loop all_losses = [] best_vloss = 1e7 for epoch in range(1, args.epochs + 1): try: train(model, optimizer, clip, lr, epoch) except OverflowError: return {'status': 'fail'} vloss = evaluate(model, val_data) if np.isnan(vloss) or vloss > 1000: return {'status': 'fail'} print('-' * 89) print('| End of epoch {:3d} | valid loss {:5.3f} | valid bpc {:8.3f}'. format(epoch, vloss, vloss / math.log(2))) if epoch > 10 and vloss > max(all_losses[-5:]): lr = lr / 2. for param_group in optimizer.param_groups: param_group['lr'] = lr all_losses.append(vloss) if vloss < best_vloss: print("Saving...") with open("model_" + args.experiment_name + ".pt", "wb") as f: torch.save(model, f) print("Saved model!\n") best_vloss = vloss return { "status": "ok", "loss": best_vloss, "model_name": "model_" + args.experiment_name + ".pt" }
test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_threds, drop_last=False) channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize model_T = TCN(input_channels_T, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_E = TCN(input_channels_E, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_G = TCN(input_channels_G, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) if args.cuda: model_T.cuda() model_E.cuda() model_G.cuda() optimizer = getattr(optim, args.optim)([{'params': model_T.parameters(), 'lr': args.lr_T}, {'params': model_E.parameters(), 'lr': args.lr_E}, {'params': model_G.parameters(), 'lr': args.lr_G} ])#,momentum=0.9) def save_network(network, network_label, epoch_label): save_filename = 'net_epoch_%d_id_%s.pth' % (epoch_label, network_label) save_path = os.path.join(args.savedir, save_filename) torch.save(network.state_dict(), save_path) print ('saved net: %s' % save_path) def train(ep):
dropout = modelContext['model_parameters']['dropout'] # Generate the model model = TCN(input_channels, n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout) # Creating a backup of the model that we can use for early stopping modelBEST = model ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### ~~~~~~~~~~~~~~~~~ LOAD DATA INTO CUDA ~~~~~~~~~~~~~~~~~~~ ### if args.cuda: torch.cuda.set_device(cuda_device) model.cuda() modelBEST.cuda() # If we are not just testing then load everything into cuda if not testSession: # Train set trueStateTRAIN = trueStateTRAIN.cuda() measuredStateTRAIN = measuredStateTRAIN.cuda() # Evaluation set trueStateEVAL = trueStateEVAL.cuda() measuredStateEVAL = measuredStateEVAL.cuda() # Test set trueStateTEST = trueStateTEST.cuda() measuredStateTEST = measuredStateTEST.cuda()