def main(): train_dataset, test_dataset = train_input_fn() optimizer = tf.keras.optimizers.Adam(learning_rate=hp.lr) model = UTransformer(hp) model_loss = Loss(model) best_score = float('-inf') not_improved_count = 0 checkpoint_file = hp.ckpt if checkpoint_file == '': checkpoint_file = 'ckp_0' else: model.load_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}') logger.add(f"{hp.logdir}/cmip_train.log", enqueue=True) for epoch in range(hp.num_epochs): for step, (x_batch_train, ys_batch_train) in enumerate(train_dataset): start = time.clock() with tf.GradientTape() as tape: y_predict = model([x_batch_train, ys_batch_train], training=True) loss_ssim, loss_l2, loss_l1, loss = model_loss([y_predict, ys_batch_train[1]]) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) elapsed = (time.clock() - start) template = ("step {} loss is {:1.5f}, " "loss ssim is {:1.5f}, " "loss l2 is {:1.5f}, " "loss l1 is {:1.5f}." "({:1.2f}s/step)") logger.info(template.format(step, loss.numpy(), loss_ssim.numpy(), loss_l2.numpy(), loss_l1.numpy(), elapsed)) if epoch % hp.num_epoch_record == 0: loss_test = 0 loss_ssim_test = 0 loss_l2_test = 0 loss_l1_test = 0 count = 0 y_true, y_pred = [], [] spinner = MoonSpinner('Testing ') for step, (x_batch_test, ys_batch_test) in enumerate(test_dataset): y_predict = model([x_batch_test, ys_batch_test], training=False) loss_ssim, loss_l2, loss_l1, loss = model_loss([y_predict, ys_batch_test[1]]) loss_ssim_test += loss_ssim.numpy() loss_l2_test += loss_l2.numpy() loss_l1_test += loss_l1.numpy() loss_test += loss.numpy() count += 1 y_true.append(np.array(nino_seq(ys_batch_test[1][:, :, :, :, 0]))) y_pred.append(np.array(nino_seq(y_predict[:, :, :, :, 0]))) spinner.next() y_true = tf.concat(y_true, axis=0) y_pred = tf.concat(y_pred, axis=0) sco = score(y_true, y_pred) if sco > best_score: best_score = sco not_improved_count = 0 best_state = True else: not_improved_count += 1 best_state = False spinner.finish() logger.info("TEST COMPLETE!") template = ("TEST DATASET STATISTICS: " "loss is {:1.5f}, " "loss ssim is {:1.5f}, " "loss l2 is {:1.5f}, " "loss l1 is {:1.5f}," "acc skill score is {:1.5f}.") logger.info(template.format(loss_test/count, loss_ssim_test/count, loss_l2_test/count, loss_l1_test/count, sco)) total_epoch = int(re.findall("\d+", checkpoint_file)[0]) checkpoint_file = checkpoint_file.replace(f'_{total_epoch}', f'_{total_epoch + 1}') # if not_improved_count == hp.early_stop_patience: # print("Validation performance didn\'t improve for {} epochs. " "Training stops.".format( # hp.early_stop_patience)) # break # if best_state: model.save_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}', save_format='tf') # model.save("my_model") logger.info("Saved checkpoint_file {}".format(checkpoint_file))
def evaluateTestingPairs(encoder, decoder, pairs, input_lang, output_lang, args): score_short = 0 score_long = 0 list_cand_short = [] list_ref_short = [] list_cand_long = [] list_ref_long = [] print("Evaluating {} testing sentences...".format(len(pairs))) for pair in pairs: output_words = evaluate(encoder, decoder, pair[0], input_lang, output_lang, args) output_sentence = ' '.join(output_words) sent_length = len(pair[1].split(' ')) if sent_length > (15): list_cand_long.append(output_sentence) list_ref_long.append(pair[1]) else: list_cand_short.append(output_sentence) list_ref_short.append(pair[1]) print("Num of short sentences (length <= 15):", len(list_cand_short)) if len(list_cand_short) > 0: if args.metric == "MULTI": score_short_rouge1, score_short_rouge2, score_short_bleu, score_short_bleu_clip = \ multi_score(list_cand_short, list_ref_short) print("score for short sentnces (length <= 15):") print("ROUGE1:", score_short_rouge1) print("ROUGE2:", score_short_rouge2) print("BLEU:", score_short_bleu) print("BLEU_CLIP:", score_short_bleu_clip) print() else: score_short = score(list_cand_short, list_ref_short, args.metric) print("{} score for short sentnces (length <= 15): {}".format(args.metric, score_short)) print("Num of long sentences (length > 15):", len(list_cand_long)) if len(list_cand_long) > 0: if args.metric == "MULTI": score_long_rouge1, score_long_rouge2, score_long_bleu, score_long_bleu_clip = \ multi_score(list_cand_long, list_ref_long) print("score for long sentnces (length > 15):") print("ROUGE1:", score_long_rouge1) print("ROUGE2:", score_long_rouge2) print("BLEU:", score_long_bleu) print("BLEU_CLIP:", score_long_bleu_clip) print() else: score_long = score(list_cand_long, list_ref_long, args.metric) print("{} score for long sentnces (length > 15): {}".format(args.metric, score_long)) get_score_overall = lambda score_short, score_long: \ (score_short * len(list_cand_short) + score_long * len(list_cand_long)) \ / (len(list_cand_short) + len(list_cand_long)) if args.metric == "MULTI": score_overall_rouge1 = get_score_overall(score_short_rouge1, score_long_rouge1) score_overall_rouge2 = get_score_overall(score_short_rouge2, score_long_rouge2) score_overall_bleu = get_score_overall(score_short_bleu, score_long_bleu) score_overall_bleu_clip = get_score_overall(score_short_bleu_clip, score_long_bleu_clip) print("Overall:") print("ROUGE1:", score_overall_rouge1) print("ROUGE2:", score_overall_rouge2) print("BLEU:", score_overall_bleu) print("BLEU_CLIP:", score_overall_bleu_clip) print() else: score_overall = get_score_overall(score_short, score_long) print("Overall {} score: {}".format(args.metric, score_overall))