def run_bc_single_env(env, num_rollouts=10, epochs=200): print(colored("ENV: %s" % env, 'green')) data = helper.run_expert(env, num_rollouts=num_rollouts) input_dim = len(data['observations'][0]) output_dim = len(data['actions'][0]) model = helper.build_model(input_dim, output_dim) rewards = [] with tf.Session(): with tf.variable_scope(env): sess = tf.get_default_session() sess.run(tf.global_variables_initializer()) for epoch in range(epochs): policy_fn = bc.train_bc(sess, data, model=model, epochs=1, curr_epoch=epoch) _data = bc.run_bc(sess, env, policy_fn, num_rollouts=num_rollouts) rewards.append(_data['returns']) return rewards
def main(): input_args = get_predict_input_args() # Load checkpoint checkpoint, validation_accuracy = load_checkpoint(input_args.checkpoint_path) useGPU = input_args.gpu is not None # Build model model = build_model(checkpoint["arch"], checkpoint["hidden_units_01"], checkpoint["hidden_units_02"], checkpoint) # Process image processed_image = process_image(input_args.image_path) # Predict topK topk = predict(processed_image, model, input_args.top_k, useGPU) # Show result with open(input_args.category_names_path, 'r') as f: cat_to_name = json.load(f) probs = topk[0][0].cpu().numpy() categories = [cat_to_name[str(category_index+1)] for category_index in topk[1][0].cpu().numpy()] for i in range(len(probs)): print("TopK {}, Probability: {}, Category: {}\n".format(i+1, probs[i], categories[i]))
def main(): # Get input arguments args = arg_parser() # Process and load the data/images image_datasets, dataloaders = helper.process_and_load_data(args.data_dir) print("The train, test & validation data has been loaded.".format(key)) # Load the model model, optimizer, criterion = helper.build_model(args.arch, args.hidden_units, args.learning_rate) print("Model, optimizer & criterion have been loaded.") # Check if GPU is available device = helper.check_gpu(args.gpu) print('Using {} for computation.'.format(device)) # Train and validate the model helper.train_and_validate_model(model, optimizer, criterion, dataloaders, device, args.epochs, print_every = 32) print("Training has been completed.") # Test the model helper.test_model(model, optimizer, criterion, dataloaders, device) print("Testing has been completed.") # Save the checkpoint helper.save_checkpoint(args.arch, model, args.epochs, args.hidden_units, args.learning_rate, image_datasets, args.save_dir) print("Model's checkpoint has been saved.")
def dagger(env, num_rollouts=1, epochs=1): data = helper.run_expert(env, num_rollouts=num_rollouts) input_dim = len(data['observations'][0]) output_dim = len(data['actions'][0]) model = helper.build_model(input_dim, output_dim) sess = tf.get_default_session() sess.run(tf.global_variables_initializer()) rewards = [] os.makedirs('checkpoints', exist_ok=True) for epoch in range(epochs): checkpoint_path = None if epoch == epochs - 1: checkpoint_path = helper.checkpoint_path(env, 'dagger-') policy_fn = bc.train_bc(sess, data, model=model, curr_epoch=epoch, epochs=1, checkpoint_path=checkpoint_path) _data = bc.run_bc(sess, env, policy_fn, num_rollouts=num_rollouts, stats=False) _data['actions'] = helper.ask_expert_actions(env, _data['observations']) rewards.append(_data['returns']) data = merge_data(data, _data) return policy_fn, rewards
def train_bc(sess, data, model=None, curr_epoch=None, epochs=1, batch_size=32, debug=False, checkpoint_path=None): mean, stdev = helper.mean_and_stdev(data['observations']) if model: m = model input_ph, output_ph = m['input_ph'], m['output_ph'] mean_v, stdev_v = m['mean_v'], m['stdev_v'] output_pred, mse, opt = m['output_pred'], m['mse'], m['opt'] else: input_dim = len(data['observations'][0]) output_dim = len(data['actions'][0]) m = helper.build_model(input_dim, output_dim) input_ph, output_ph = m['input_ph'], m['output_ph'] mean_v, stdev_v = m['mean_v'], m['stdev_v'] output_pred, mse, opt = m['output_pred'], m['mse'], m['opt'] sess.run(tf.global_variables_initializer()) mean_v.load(mean, session=sess) stdev_v.load(stdev, session=sess) if checkpoint_path: saver = tf.train.Saver() # run training n_inputs = len(data['observations']) if debug: print(colored('n_inputs: %d' % n_inputs, 'red')) for epoch in range(epochs): for i in range(1_000): indices = np.random.randint(n_inputs, size=batch_size) input_batch = data['observations'][indices] output_batch = data['actions'][indices] _, mse_run = sess.run([opt, mse], feed_dict={ input_ph: input_batch, output_ph: output_batch }) if curr_epoch: print('epoch: {0:03d} mse: {1:.4f}'.format(curr_epoch, mse_run)) else: print('epoch: {0:03d} mse: {1:.4f}'.format(epoch, mse_run)) if checkpoint_path and epoch == epochs - 1: saver.save(sess, checkpoint_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('env', type=str) parser.add_argument('--model_checkpoint', type=str) parser.add_argument('--render', type=bool, default=True) parser.add_argument('--max_timesteps', type=int) parser.add_argument('--num_rollouts', type=int, default=10) args = parser.parse_args() with tf.Session() as sess: with tf.variable_scope(args.env): input_dim, output_dim = helper.input_output_shape(args.env) model = helper.build_model(input_dim, output_dim) input_ph, output_pred = model['input_ph'], model['output_pred'] policy_fn = tf_util.function([input_ph], output_pred) if args.model_checkpoint: checkpoint_path = args.model_checkpoint else: checkpoint_path = helper.checkpoint_path(args.env) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) env = gym.make(helper.envname(args.env)) max_steps = args.max_timesteps or env.spec.timestep_limit returns = [] observations = [] actions = [] for i in range(args.num_rollouts): print('iter', i) obs = env.reset() done = False totalr = 0 steps = 0 while not done: action = policy_fn(obs[None, :]) observations.append(obs) actions.append(action) obs, r, done, _ = env.step(action) totalr += r steps += 1 if args.render: env.render() if steps >= max_steps: break returns.append(totalr) helper.print_returns_stats(returns)
def test(opts): source_vocab = vocabs.load_vocabs_from_file(opts.source_vocab) target_vocab = vocabs.load_vocabs_from_file(opts.target_vocab) test_dataset = Seq2SeqDataset(opts.testing_dir, source_vocab, target_vocab, opts.source_lang, opts.target_lang) test_dataloader = Seq2SeqDataLoader( test_dataset, test_dataset.source_pad_id, test_dataset.target_pad_id, batch_first=True, batch_size=opts.batch_size, shuffle=True, pin_memory=(opts.device.type == "cuda"), num_workers=4, ) model = helper.build_model( opts, test_dataset.source_vocab_size, test_dataset.target_vocab_size, test_dataset.source_pad_id, test_dataset.target_sos, test_dataset.target_eos, test_dataset.target_pad_id, opts.device, ) model.load_state_dict(torch.load(opts.model_path)) model.eval() # The loss function loss_function = torch.nn.CrossEntropyLoss( ignore_index=test_dataset.target_pad_id) # Evaluate the model test_loss = evaluate_model_by_loss_function(model, loss_function, test_dataloader, opts.device) test_bleu = evaluate_model_by_bleu_score( model, test_dataloader, opts.device, test_dataset.target_sos, test_dataset.target_eos, test_dataset.target_pad_id, target_vocab.get_id2word(), ) print(f"Test loss={test_loss}, Test Bleu={test_bleu}")
def main(): # parse command line arguments parser = argparse.ArgumentParser() parser.add_argument("data_dir", help="directory containing training data") parser.add_argument("--save_dir", help="directory to save checkpoint", default=os.path.dirname(os.path.abspath(__file__))) parser.add_argument("--arch", help="choose architecture", choices=model_choices, default="vgg19") parser.add_argument("--learning_rate", help="set learning rate", type=float, default=0.001) parser.add_argument("--hidden_units", help="set hidden units", default=[25088, 4096, 4096, 102]) parser.add_argument("--epochs", help="set number of epochs to train for", type=int, default=5) parser.add_argument("--gpu", help="use GPU for training", action="store_true") args = parser.parse_args() data_dir = args.data_dir save_dir = args.save_dir arch = args.arch lr = args.learning_rate hidden_units = args.hidden_units epochs = args.epochs cuda = args.gpu # load data trainloader, validloader, class_to_idx = load_data(data_dir) # TODO: Build and train your network model = build_model(arch, hidden_units) # TODO: Train a model with a pre-trained network model = train(model, epochs, lr, cuda, trainloader, validloader) # TODO: Save the checkpoint save_model(model, arch, hidden_units, save_dir, class_to_idx)
def test_build_model_input_norm(): with tf.Session() as sess: with tf.variable_scope("test_build_model_input_norm"): m = helper.build_model(5, 3) input_ph, input_norm = m['input_ph'], m['input_norm'] mean_v, stdev_v = m['mean_v'], m['stdev_v'] sess.run(tf.global_variables_initializer()) mean_v.load([0.5, 0.5, 0.5, 0.5, 0.5], session=sess) stdev_v.load([1, 1, 1, 1, 1], session=sess) values, = sess.run([input_norm], feed_dict={input_ph: [ [1, 1, 1, 1, 1], [1, 2, 3, 4, 5], ]}) expected_values = [[ [0.5, 0.5, 0.5, 0.5, 0.5], [0.5, 1.5, 2.5, 3.5, 4.5], ]] assert (values - np.array(expected_values) < 0.01).all()
def predict(opts): # Get our current version of spacy spacy_instance = utils.get_spacy_instance(opts.source_lang) # Make the text lowercase and no EOF input_text = opts.input_text.lower().strip() # Parse input into tokens with spacy input_tokens = [ token.text for token in spacy_instance.tokenizer(input_text) ] print("Input:", " ".join(input_tokens)) # Get the vocabs # TODO: Handle the case of translating from fr to en source_vocab = vocabs.load_vocabs_from_file(opts.source_vocab) target_vocab = vocabs.load_vocabs_from_file(opts.target_vocab) # Get the mappings source_word2id = source_vocab.get_word2id() target_word2id = target_vocab.get_word2id() source_id2word = source_vocab.get_id2word() target_id2word = target_vocab.get_id2word() source_vocab_size = len(source_word2id) + 2 target_vocab_size = len(target_word2id) + 4 src_unk, src_pad = range(len(source_word2id), source_vocab_size) trg_unk, trg_sos, trg_eos, trg_pad = range(len(target_word2id), target_vocab_size) model = helper.build_model( opts, source_vocab_size, target_vocab_size, src_pad, trg_sos, trg_eos, trg_pad, opts.device, ) model.load_state_dict(torch.load(opts.model_path)) model.eval() src = [torch.tensor([source_word2id[word] for word in input_tokens])] src_lens = torch.tensor([len(input_tokens)]) src = torch.nn.utils.rnn.pad_sequence(src, padding_value=src_pad) predicted_words = None with torch.no_grad(): # Get the output logits = model(src, src_lens) predicted_trg = logits.argmax(2)[0, :] # Remove the EOS and SOS predicted_trg = predicted_trg[1:-1] # Get the resultant sequence of words predicted_words = [ target_id2word.get(word_id.item(), "NAN") for word_id in predicted_trg ] return predicted_words
def main(): input_args = get_train_input_args() # Create & adjust data train_dir = input_args.data_dir + '/train' valid_dir = input_args.data_dir + '/valid' test_dir = input_args.data_dir + '/test' print("\n\n Trainings folder: {}".format(train_dir)) print(" Validation folder: {}".format(valid_dir)) print(" Test folder: {}\n".format(test_dir)) train_transforms = transforms.Compose([ transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_transforms = transforms.Compose([ transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_data = datasets.ImageFolder(train_dir, transform=train_transforms) valid_data = datasets.ImageFolder(valid_dir, transform=test_transforms) test_data = datasets.ImageFolder(test_dir, transform=test_transforms) trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True) validloader = torch.utils.data.DataLoader(valid_data, batch_size=32) testloader = torch.utils.data.DataLoader(test_data, batch_size=32) # Load checkpoint checkpoint = None best_accuracy = 0 if input_args.checkpoint_path is not None: checkpoint, best_accuracy = load_checkpoint(input_args.checkpoint_path) useGPU = input_args.gpu is not None arch = input_args.arch if checkpoint is None else checkpoint["arch"] hidden_units_01 = input_args.hidden_units_01 if checkpoint is None else checkpoint[ "hidden_units_01"] hidden_units_02 = input_args.hidden_units_02 if checkpoint is None else checkpoint[ "hidden_units_02"] # Build model model = build_model(arch, hidden_units_01, hidden_units_02, checkpoint) # Train model print("\n\nStart Training...\n") if best_accuracy > 0: print("Last validation accuracy: {}".format(best_accuracy)) epochs = input_args.epochs learning_rate = input_args.learning_rate steps = 0 running_loss = 0 print_every = 10 train_losses, validation_losses = [], [] # Use GPU if it's available and gpu is not None device = torch.device( "cuda" if torch.cuda.is_available() and useGPU else "cpu") print(f"Device: {device}") criterion = nn.NLLLoss() # Only train the classifier parameters, feature parameters are frozen optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) model.to(device) for epoch in range(epochs): for inputs, labels in trainloader: steps += 1 # Move input and label tensors to the default device inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() logps = model.forward(inputs) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: validation_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for inputs, labels in validloader: inputs, labels = inputs.to(device), labels.to(device) logps = model.forward(inputs) batch_loss = criterion(logps, labels) validation_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() print( f"Epoch {epoch+1}/{epochs}, " f"Steps {steps}\n" f"Train loss: {running_loss/print_every:.3f}, " f"Validation loss: {validation_loss/len(validloader):.3f}, " f"Validation accuracy: {accuracy/len(validloader):.3f}\n") train_losses.append(running_loss / print_every) validation_losses.append(validation_loss / len(validloader)) if best_accuracy < accuracy / len( validloader) and accuracy / len(validloader) > 0.6: best_accuracy = accuracy / len(validloader) path = input_args.save_dir + "/checkpoint_best_accuracy.pth" save_checkpoint(model, train_data, path, best_accuracy, input_args.arch, hidden_units_01, hidden_units_02) running_loss = 0 print("\n\nEnd Training...\n") # Test trained model test_model(model, testloader)
def test_build_model(): with tf.Session(): with tf.variable_scope("test_build_model"): m = helper.build_model(11, 3) assert m != None
parser.add_argument('--gpu', dest="gpu", action="store", default="gpu") parser = parser.parse_args() data_dir = parser.data_directory save_dir = parser.save_dir arch = parser.arch learning_rate = parser.learning_rate hidden_units = parser.hidden_units epochs = parser.epochs gpu = parser.gpu # 1) Load Data image_datasets, trainloader, testloader, validloader = helper.loadData( data_dir) # 2) Build Model model = helper.build_model(arch, hidden_units) # 3) Train Model model, optimizer, criterion = helper.train_model(model, trainloader, validloader, learning_rate, epochs, gpu) # 4) Save the checkpoint model.to('cpu') model.class_to_idx = image_datasets['train_data'].class_to_idx checkpoint = { 'model': model, 'hidden_units': hidden_units, 'optimizer_state_dict': optimizer.state_dict, 'criterion': criterion, 'epochs': epochs, 'state_dict': model.state_dict(), 'class_to_idx': model.class_to_idx
import argparse import helper parser = argparse.ArgumentParser() parser.add_argument('data_dir',nargs = '?',type = str, default = './flowers/') parser.add_argument('--gpu',dest = 'gpu',action = 'store_true',default = False) parser.add_argument('--save_dir',dest = 'save_dir',action = 'store',default = './checkpoint.pth') parser.add_argument('--arch',dest = 'arch',action = 'store',default ='vgg16') parser.add_argument('--learning_rate',dest ='learning_rate',action = 'store',default = 0.001,type = float) parser.add_argument('--hidden_units',dest = 'hidden_units',action = 'store',default = 1024, type = int ) parser.add_argument('--epochs',dest = 'epochs',action = 'store',default = 20,type = int) args = parser.parse_args() # load data train_data,trainloader, testloader,validloader = helper.load_data() # build model print(args.gpu) print(args.arch) print(type(args.hidden_units)) print(type(args.learning_rate)) model,device,criterion,optimizer = helper.build_model(args.gpu,args.arch,args.hidden_units,args.learning_rate) # train model helper.train_model(args.epochs,trainloader,validloader,model,device,criterion,optimizer) # save the trained model helper.save_checkpoint(model,args.epochs,args.arch,optimizer,train_data)
param_grid) # defining parameter range - using only odd numbers param_grid = {'n_neighbors': np.arange(1, 42, 2)} find_best_param(KNeighborsClassifier, 'Survived', FEATURES, titanic_df, param_grid) # Need to run only once to get best params for respective classifier find_best() result_dict = { 'Survived - kNearestNeighbors': build_model(knearest_neighbors_fn, 'Survived', FEATURES, titanic_df, options={'k': 33}), 'Survived - Linear SVM': build_model(linear_svm_fn, 'Survived', FEATURES, titanic_df, options={ 'C': 0.1, 'loss': 'squared_hinge' }), 'Survived - SVM Linear': build_model(svm_linear_fn, 'Survived', FEATURES,
""" Create sequences that will be used as the input to the network. Create next_chars array that will serve as the labels during the training. """ sequences, next_chars = helper.create_sequences(text, SEQUENCE_LENGTH, SEQUENCE_STEP) char_to_index, indices_char = helper.get_chars_index_dicts(chars) """ The network is not able to work with characters and strings, we need to vectorise. """ X, y = helper.vectorize(sequences, SEQUENCE_LENGTH, chars, char_to_index, next_chars) """ Define the structure of the model. """ model = helper.build_model(SEQUENCE_LENGTH, chars) """ Train the model """ # model.fit(X, y, batch_size=128, nb_epoch=EPOCHS) model = load_model( "final.h5") # you can skip training by loading the trained weights for diversity in [0.2, 0.5, 1.0, 1.2]: print() print('----- diversity:', diversity) generated = '' # insert your 40-chars long string. OBS it needs to be exactly 40 chars! sentence = "The grass is green and my car is red lik"
param_grid = {'loss': ['deviance', 'exponential'], 'criterion': ['friedman_mse', 'mse', 'mae'], 'n_estimators': [10, 20, 50, 100, 200], 'learning_rate': [1, 0.1, 0.01, 0.001],} find_best_param(GradientBoostingClassifier, 'income', FEATURES, adult_df, param_grid) # defining parameter range param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': np.arange(1, 10, 1), 'min_samples_split': np.arange(2, 21, 1)} find_best_param(DecisionTreeClassifier, 'income', FEATURES, adult_df, param_grid) # defining parameter range - using only odd numbers param_grid = {'n_neighbors': np.arange(1, 42, 2)} find_best_param(KNeighborsClassifier, 'income', FEATURES, adult_df, param_grid) # Need to run only once to get best params for respective classifier find_best() result_dict = { 'income - kNearestNeighbors': build_model(knearest_neighbors_fn, 'income', FEATURES, adult_df, options={'k': 35}), 'income - Linear SVM': build_model(linear_svm_fn, 'income', FEATURES, adult_df, options={'C': 0.1, 'loss': 'hinge'}), 'income - SVM Linear': build_model(svm_linear_fn, 'income', FEATURES, adult_df, options={'C': 1, 'gamma': 0.1}), 'income - SVM RBF': build_model(svm_rbf_fn, 'income', FEATURES, adult_df, options={'C': 1, 'gamma': 0.1}), 'income - Ada Boosting': build_model(ada_boosting_fn, 'income', FEATURES, adult_df, options={'algorithm': 'SAMME.R', 'learning_rate': 1, 'n_estimators': 500}), 'income - Gradient Boosting': build_model(gradient_boosting_fn, 'income', FEATURES, adult_df, options={'criterion': 'friedman_mse', 'learning_rate': 0.1, 'loss': 'exponential', 'n_estimators': 100}), 'income - Neural networks': build_model(neural_network_fn, 'income', FEATURES, adult_df, options={'activation':'tanh', 'learning_rate':'invscaling', 'solver': 'adam'}), 'income - Decision_tree': build_model(decision_tree_fn, 'income', FEATURES, adult_df, options={'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2}) } # Running code with default values plt = print_results(result_dict) #plt.show() plt.savefig(fig_path + 'results.png') title = "Learning Curves for Decision Tree"
random_state=42) #Build the model input_shape = features.shape[1] n_epochs = 5000 width = 30 height = 10 bestRMSLEForm = [0.051880246254638995, 28, 0] bestRMSLEBand = [0.03375373930387272, 25, 0] for j in range(1, height + 1): for i in range(1, width + 1): modelForm = build_model(input_shape, i, j) modelBand = build_model(input_shape, i, j) #Train the model print("training for width {}, height {} has started".format(i, j)) early_stopForm = keras.callbacks.EarlyStopping(monitor='val_loss', patience=200) historyForm = modelForm.fit( X_trainF, y_trainF, epochs=n_epochs, validation_split=0.2, verbose=0, callbacks=[early_stopForm, PrintProgress()])
action="store", type=int, default=5, help="number of epochs for training, default 5") parser.add_argument("-d" "--device", dest='device', action="store", type=str, default="cuda", help="device for training,default cuda") args = parser.parse_args() train_datasets, trainloader, validloader, testloader = process_data( args.data_dir) model = build_model(args.arch, args.hidden_units, args.output_units) running_losses, running_valid_losses, trained_model = train_model( args.data_dir, model, args.learning_rate, args.epochs, args.device) test_data(trained_model, args.data_dir, args.device) trained_model.class_to_idx = train_datasets.class_to_idx #device = torch.to("cuda" if torch.cuda.is_available() and args.device == 'cuda' else "cpu") #trained_model.to(device) torch.save( { 'arch': args.arch, 'hidden_units': args.hidden_units, 'output_units': args.output_units, 'state_dict': trained_model.state_dict(), 'class_to_idx': trained_model.class_to_idx }, args.save_dir)
Create next_chars array that will serve as the labels during the training. """ word_sequences, next_words = helper.create_word_sequences( words, WORD_SEQUENCE_LENGTH, WORD_SEQUENCE_STEP) word_to_index, indices_word = helper.get_chars_index_dicts(unique_words) # """ # The network is not able to work with characters and strings, we need to vectorise. # """ X, y = helper.vectorize(word_sequences, WORD_SEQUENCE_LENGTH, unique_words, word_to_index, next_words) # """ # Define the structure of the model. # """ model = helper.build_model(WORD_SEQUENCE_LENGTH, unique_words) # """ # Train the model # """ model.fit(X, y, batch_size=128, nb_epoch=EPOCHS) # model = load_model("final.h5") # you can skip training by loading the trained weights for diversity in [0.2, 0.5, 1.0, 1.2]: print() print('----- diversity:', diversity) generated = [] sentence = ['amalia', 'kamalia', 'tansseja'] generated += sentence
def train(opts): """ Trains the model """ torch.manual_seed(opts.seed) source_vocab = vocabs.load_vocabs_from_file(opts.source_vocab) target_vocab = vocabs.load_vocabs_from_file(opts.target_vocab) dataset = Seq2SeqDataset( opts.training_dir, source_vocab, target_vocab, opts.source_lang, opts.target_lang, ) num_training_data = int(len(dataset) * opts.train_val_ratio) num_val_data = len(dataset) - num_training_data train_dataset, val_dataset = torch.utils.data.random_split( dataset, [num_training_data, num_val_data]) train_dataloader = Seq2SeqDataLoader( train_dataset, dataset.source_pad_id, dataset.target_pad_id, batch_first=True, batch_size=opts.batch_size, shuffle=True, pin_memory=(opts.device.type == "cuda"), num_workers=4, ) val_dataloader = Seq2SeqDataLoader( val_dataset, dataset.source_pad_id, dataset.target_pad_id, batch_first=True, batch_size=opts.batch_size, shuffle=True, pin_memory=(opts.device.type == "cuda"), num_workers=4, ) model = helper.build_model( opts, dataset.source_vocab_size, dataset.target_vocab_size, dataset.source_pad_id, dataset.target_sos, dataset.target_eos, dataset.target_pad_id, opts.device, ) patience = opts.patience num_epochs = opts.epochs if opts.patience is None: patience = float("inf") else: num_epochs = float("inf") best_val_loss = float("inf") num_poor = 0 epoch = 1 optimizer = torch.optim.Adam(model.parameters(), lr=opts.learning_rate) if opts.resume_from_checkpoint and os.path.isfile( opts.resume_from_checkpoint): print("Loading from checkpoint") best_val_loss, num_poor, epoch = load_checkpoint( opts.resume_from_checkpoint, model, optimizer) print( f"Previous state > Epoch {epoch}: Val loss={best_val_loss}, num_poor={num_poor}" ) while epoch <= num_epochs and num_poor < patience: # Train loss_function = nn.CrossEntropyLoss(ignore_index=dataset.target_pad_id) train_loss = train_for_one_epoch(model, loss_function, optimizer, train_dataloader, opts.device) # Evaluate the model val_loss = test.evaluate_model_by_loss_function( model, loss_function, val_dataloader, opts.device) print(f"Epoch {epoch}: Train loss={train_loss}, Val loss={val_loss}") model.cpu() if val_loss > best_val_loss: num_poor += 1 else: num_poor = 0 best_val_loss = val_loss print("Saved model") torch.save(model.state_dict(), opts.model_path) save_checkpoint( opts.save_checkpoint_to, model, optimizer, best_val_loss, num_poor, epoch, ) print("Saved checkpoint") model.to(opts.device) epoch += 1 if epoch > num_epochs: print(f"Finished {num_epochs} epochs") else: print(f"Loss did not improve after {patience} epochs") val_bleu_score = test.evaluate_model_by_bleu_score( model, val_dataloader, opts.device, dataset.target_sos, dataset.target_eos, dataset.target_pad_id, target_vocab.get_id2word(), ) print(f"Final BLEU score: {val_bleu_score}. Done.")