def _train_gat_trans(args): # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) # Load the eval src and tgt files for evaluation reference = open(args.eval_ref, 'r') eval_file = open(args.eval, 'r') OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type) (dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch, src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab, max_length_targ, dataset_size) = GetGATDataset(args) model = TransGAT(args, src_vocab_size, src_vocab, tgt_vocab_size, max_length_targ, tgt_vocab) loss_layer = LossLayer(tgt_vocab_size, 0.1) if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) else: optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) # Save model parameters for future use if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang, args.model)): with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model), 'rb') as fp: PARAMS = pickle.load(fp) print('Loaded Parameters..') else: if not os.path.isdir(log_dir): os.makedirs(log_dir) PARAMS = { "args": args, "src_vocab_size": src_vocab_size, "tgt_vocab_size": tgt_vocab_size, "max_tgt_length": max_length_targ, "dataset_size": dataset_size, "step": 0 } train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') ckpt = tf.train.Checkpoint( model=model, optimizer=optimizer ) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored!!') if args.epochs is not None: steps = args.epochs * steps_per_epoch else: steps = args.steps def train_step(nodes, labels, node1, node2, targ): with tf.GradientTape() as tape: predictions = model(nodes, labels, node1, node2, targ, None) predictions = model.metric_layer([predictions, targ]) batch_loss = loss_layer([predictions, targ]) gradients = tape.gradient(batch_loss, model.trainable_weights) optimizer.apply_gradients(zip(gradients, model.trainable_weights)) acc = model.metrics[0].result() ppl = model.metrics[-1].result() batch_loss = train_loss(batch_loss) return batch_loss, acc, ppl # Eval function def eval_step(steps=None): model.trainable = False results = [] ref_target = [] eval_results = open(EvalResultsFile, 'w+') if steps is None: dev_set = eval_set else: dev_set = eval_set.take(steps) for (batch, (nodes, labels, node1, node2, targets)) in tqdm(enumerate(dev_set)): predictions = model(nodes, labels, node1, node2, targ=None, mask=None) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (tgt_vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition("<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = tgt_vocab.sequences_to_texts(i) sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) eval_results.close() model.trainable = True return rogue # Eval function def test_step(): model.trainable = False results = [] ref_target = [] eval_results = open(TestResults, 'w+') for (batch, (nodes, labels, node1, node2)) in tqdm(enumerate(test_set)): predictions = model(nodes, labels, node1, node2, targ=None, mask=None) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (tgt_vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition("<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = tgt_vocab.sequences_to_texts(i) sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) score = 0 eval_results.close() model.trainable = True process_results(TestResults) return rogue, score if args.mode == 'train': train_loss.reset_states() train_accuracy.reset_states() for (batch, (nodes, labels, node1, node2, targ)) in tqdm(enumerate(dataset.repeat(-1))): if PARAMS['step'] < steps: start = time.time() PARAMS['step'] += 1 if args.decay is not None: optimizer._lr = learning_rate(tf.cast(PARAMS['step'], dtype=tf.float32)) batch_loss, acc, ppl = train_step(nodes, labels, node1, node2, targ) if batch % 100 == 0: print('Step {} Learning Rate {:.4f} Train Loss {:.4f} ' 'Accuracy {:.4f} Perplex {:.4f}'.format(PARAMS['step'], optimizer._lr, train_loss.result(), acc.numpy(), ppl.numpy())) print('Time {} \n'.format(time.time() - start)) # log the training results tf.io.write_file(log_file, f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}" f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n") if batch % args.eval_steps == 0: metric_dict = eval_step(5) print('\n' + '---------------------------------------------------------------------' + '\n') print('ROGUE {:.4f}'.format(metric_dict)) print('\n' + '---------------------------------------------------------------------' + '\n') if batch % args.checkpoint == 0: print("Saving checkpoint \n") ckpt_save_path = ckpt_manager.save() with open(log_dir + '/' + args.lang + '_' + args.model + '_params', 'wb+') as fp: pickle.dump(PARAMS, fp) else: break rogue, score = test_step() print('\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f}'.format(rogue)) print('\n' + '---------------------------------------------------------------------' + '\n') elif args.mode == 'test': rogue, score = test_step() print('\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f}'.format(rogue)) print('\n' + '---------------------------------------------------------------------' + '\n') else: raise ValueError("Mode must be either 'train' or 'test'")
from src.MultilingualDataLoader import ProcessMultilingualDataset from src.arguments import get_args from src.models.GraphAttentionModel import TransGAT from src.utils.metrics import LossLayer from src.utils.model_utils import CustomSchedule, _set_up_dirs from src.utils.rogue import rouge_n # model paramteres if __name__ == "__main__": args = get_args() global step # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) if args.enc_type == 'gat' and args.dec_type == 'transformer': OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type (dataset, src_vocab, src_vocab_size, tgt_vocab, tgt_vocab_size, MULTI_BUFFER_SIZE, steps_per_epoch, MaxSeqSize) = ProcessMultilingualDataset(args) # Load the eval src and tgt files for evaluation reference = open(args.eval_ref, 'r') eval_file = open(args.eval, 'r') model = TransGAT(args, src_vocab_size, src_vocab, tgt_vocab_size, MaxSeqSize, tgt_vocab) loss_layer = LossLayer(tgt_vocab_size, 0.1) if args.decay is not None:
def _train_transformer(args): # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type) dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, \ steps_per_epoch, src_vocab_size, vocab, dataset_size, max_seq_len = GetDataset(args) reference = open(args.eval_ref, 'r') if args.epochs is not None: steps = args.epochs * steps_per_epoch else: steps = args.steps # Save model parameters for future use if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang, args.model)): with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model), 'rb') as fp: PARAMS = pickle.load(fp) print('Loaded Parameters..') else: if not os.path.isdir(log_dir): os.makedirs(log_dir) PARAMS = { "args": args, "vocab_size": src_vocab_size, "dataset_size": dataset_size, "max_tgt_length": max_seq_len, "step": 0 } if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = LazyAdam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) else: optimizer = LazyAdam(learning_rate=args.learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') model = Transformer(args, src_vocab_size) loss_layer = LossLayer(src_vocab_size, 0.1) ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') if args.learning_rate is not None: optimizer._lr = args.learning_rate def train_step(inp, tar): with tf.GradientTape() as tape: predictions = model(inp, tar, training=model.trainable) predictions = model.metric_layer([predictions, tar]) loss = loss_layer([predictions, tar]) reg_loss = tf.losses.get_regularization_loss() loss += reg_loss gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) acc = model.metrics[0].result() ppl = model.metrics[-1].result() return loss, acc, ppl def eval_step(steps=None): model.trainable = False results = [] ref_target = [] eval_results = open(EvalResultsFile, 'w+') if steps is None: dev_set = eval_set else: dev_set = eval_set.take(steps) for (batch, (inp, tar)) in tqdm(enumerate(dev_set)): predictions = model(inp, targets=None, training=model.trainable) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition( "<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = vocab.sequences_to_texts(i) sentence = [ j.partition("start")[2].partition("end")[0] for j in sentences ] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) score = 0 eval_results.close() model.trainable = True return rogue, score def test_step(): model.trainable = False results = [] ref_target = [] eval_results = open(TestResults, 'w+') for (batch, (inp)) in tqdm(enumerate(test_set)): predictions = model(inp, targets=None, training=model.trainable) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition( "<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = vocab.sequences_to_texts(i) sentence = [ j.partition("start")[2].partition("end")[0] for j in sentences ] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) score = 0 eval_results.close() model.trainable = True return rogue, score train_loss.reset_states() train_accuracy.reset_states() for (batch, (inp, tgt)) in tqdm(enumerate(dataset.repeat(-1))): if PARAMS['step'] < steps: start = time.time() PARAMS['step'] += 1 if args.decay is not None: optimizer._lr = learning_rate( tf.cast(PARAMS['step'], dtype=tf.float32)) batch_loss, acc, ppl = train_step(inp, tgt) if batch % 100 == 0: print('Step {} Learning Rate {:.4f} Train Loss {:.4f} ' 'Accuracy {:.4f} Perplex {:.4f}'.format( PARAMS['step'], optimizer._lr, train_loss.result(), acc.numpy(), ppl.numpy())) print('Time {} \n'.format(time.time() - start)) # log the training results tf.io.write_file( log_file, f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}" f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n") if batch % args.eval_steps == 0: rogue, score = eval_step(5) print( '\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score)) print( '\n' + '---------------------------------------------------------------------' + '\n') if batch % args.checkpoint == 0: print("Saving checkpoint \n") ckpt_save_path = ckpt_manager.save() with open( log_dir + '/' + args.lang + '_' + args.model + '_params', 'wb+') as fp: pickle.dump(PARAMS, fp) else: break rogue, score = test_step() print( '\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score)) print( '\n' + '---------------------------------------------------------------------' + '\n')
def _train_rnn(args): # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type) dataset, BUFFER_SIZE, BATCH_SIZE, \ steps_per_epoch, vocab_inp_size, vocab_tgt_size, target_lang = GetDataset(args) step = 0 if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) else: optimizer = tf.train.AdamOptimizer(beta1=0.9, beta2=0.98, epsilon=1e-9) loss_object = tf.keras.losses.SparseCategoricalCrossentropy() model = RNNModel.RNNModel(vocab_inp_size, vocab_tgt_size, target_lang, args) enc_hidden = model.encoder.initialize_hidden_state() ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, 0)) loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) loss_ *= mask return tf.reduce_mean(loss_) def train_step(inp, targ, enc_hidden): with tf.GradientTape() as tape: predictions, dec_hidden, loss = model(inp, targ, enc_hidden) reg_loss = tf.losses.get_regularization_loss() loss += reg_loss batch_loss = (loss / int(targ.shape[1])) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss def eval_step(inp, trg, enc_hidden): model.trainable = False predictions, dec_hidden, eval_loss = model(inp, trg, enc_hidden) eval_loss = (eval_loss / int(targ.shape[1])) model.trainable = True return eval_loss for epoch in range(args.epochs): print('Learning Rate' + str(optimizer._lr) + ' Step ' + str(step)) with tqdm(total=(38668 // args.batch_size)) as pbar: for (batch, (inp, targ)) in tqdm(enumerate(dataset)): start = time.time() step += 1 if args.decay is not None: optimizer._lr = learning_rate( tf.cast(step, dtype=tf.float32)) if batch % args.eval_steps == 0: eval_loss = eval_step(inp, targ, enc_hidden) print( '\n' + '---------------------------------------------------------------------' + '\n') print('Epoch {} Batch {} Eval Loss {:.4f} '.format( epoch, batch, eval_loss.numpy())) print( '\n' + '---------------------------------------------------------------------' + '\n') else: batch_loss = train_step(inp, targ, enc_hidden) print('Epoch {} Batch {} Batch Loss {:.4f} '.format( epoch, batch, batch_loss.numpy())) # log the training results tf.io.write_file(log_file, "Epoch {}".format(epoch)) tf.io.write_file(log_file, "Train Loss: {}".format(batch_loss)) if batch % args.checkpoint == 0: ckpt_save_path = ckpt_manager.save() print("Saving checkpoint \n") print('Time {} '.format(time.time() - start)) pbar.update(1) if args.decay is not None: optimizer._lr = optimizer._lr * args.decay_rate**(batch // 1)
from src.models.Transformer import Transformer from src.utils.Optimizers import LazyAdam from src.utils.metrics import LossLayer from src.utils.model_utils import CustomSchedule, _set_up_dirs from src.utils.rogue import rouge_n from src.utils.model_utils import process_results # model paramteres if __name__ == "__main__": args = get_args() global step # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) if args.model == 'gat': OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type (dataset, src_vocab, src_vocab_size, tgt_vocab, tgt_vocab_size, MULTI_BUFFER_SIZE, steps_per_epoch, MaxSeqSize) = ProcessMultilingualDataset(args) # Load the eval src and tgt files for evaluation reference = open(args.eval_ref, 'r') eval_file = open(args.eval, 'r') model = TransGAT(args, src_vocab_size, src_vocab, tgt_vocab_size, MaxSeqSize, tgt_vocab) loss_layer = LossLayer(tgt_vocab_size, 0.1) if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps)