def generate(train_dataset, src, trg, model_name, train_domain, tok_folder): # Get all folders in the root path test_datasets = [os.path.join(DATASETS_PATH, tok_folder, x) for x in [f"health_fairseq_vhealth_{src}-{trg}", f"biological_fairseq_vbiological_{src}-{trg}", f"merged_fairseq_vmerged_{src}-{trg}"]] for test_dataset in test_datasets: test_domain, (test_src, test_trg) = utils.get_dataset_ids(test_dataset) test_domain = test_domain.replace("_fairseq", "").replace("_vhealth", "").replace("_vbiological", "").replace("_vmerged", "") print("#############################################") print(f"=> TESTING MODEL FROM '{train_domain}' IN DOMAIN '{test_domain}'") # Create path eval_path = os.path.join(train_dataset, DATASET_EVAL_NAME, model_name, test_domain) Path(eval_path).mkdir(parents=True, exist_ok=True) # Preprocess domain datasets with train tokenizers source_dataset = test_dataset vocab_path = train_dataset output_path = eval_path print(f"\t- Preprocessing datasets for: {test_domain}...") subprocess.call(['sh', './scripts/3_preprocess.sh', source_dataset, vocab_path, output_path, tok_folder, src, trg]) # Generate them for beam in BEAMS: eval_path_bin = os.path.join(eval_path, "data-bin") model_path = os.path.join(train_dataset, "checkpoints", model_name) # Create output path output_path = os.path.join(eval_path, f"beam{beam}") Path(output_path).mkdir(parents=True, exist_ok=True) print(f"\t- Generating translations for: {test_domain}...") subprocess.call(['sh', './scripts/5_generate.sh', eval_path_bin, model_path, output_path, src, trg, str(beam)]) print("") print("########################################################################") print("########################################################################") print("") print("") print("------------------------------------------------------------------------") print("------------------------------------------------------------------------") print("")
(f"health_fairseq_v{vocab}_es-en", [("checkpoint_best.pt", f"Health\n(small; VD={vocab[0].upper()})")]), (f"biological_fairseq_v{vocab}_es-en", [("checkpoint_best.pt", f"Biological\n(small; VD={vocab[0].upper()})")]), # (f"merged_fairseq_v{vocab}_es-en", [("checkpoint_best.pt", f"Merged\n(small; VD={vocab[0].upper()})")]), (f"health_biological_fairseq_v{vocab}_es-en", [("checkpoint_best.pt", f"H→B\n(small; VD={vocab[0].upper()})")]), ] ] for dataset, models in datasets: print(f"Setting vocab ({vocab})...") domain, (src, trg) = utils.get_dataset_ids(dataset) fname_base = f"{domain}_{src}-{trg}" # Train model for model_name, label in models: print(f"Getting model ({fname_base}; {model_name})...") metrics += get_metrics(dataset, src, trg, model_name=model_name, label=label, train_domain=domain) # Save data df = pd.DataFrame(metrics) df.to_csv(
from pathlib import Path import json from plot.data import experiments from mt import DATASETS_PATH, DATASET_EVAL_NAME, DATASET_SUMMARY_NAME from mt import helpers, utils import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set() DATASET = "europarl_fairseq_es-en" # basepath = f"/Users/salvacarrion/Desktop/euro es-en 32k" domain, (SRC, TRG) = utils.get_dataset_ids(DATASET) domain = domain.replace("_fairseq", "").strip() LIMIT = 100 VOCAB_LANG = TRG # Create folder summary_path = os.path.join(DATASETS_PATH, "custom_plots") Path(summary_path).mkdir(parents=True, exist_ok=True) def plot_tok_distribution(show_values=False, savepath="."): sns.set(font_scale=1.5) # crazy big title = f"{domain} {VOCAB_LANG} (32k vs 64)" filename = f"tok_distribution__{title.replace(' ', '_').lower()}__limit{LIMIT}".lower( )
def get_beam_scores(train_dataset, src, trg, tok_folder): domain, (src, trg) = utils.get_dataset_ids(dataset) fname_base = f"{domain}_{src}-{trg}" print(f"=> TESTING MODEL FROM '{fname_base}'") # Create path test_domain = domain eval_path = os.path.join(train_dataset, DATASET_EVAL_NAME, model_name, test_domain) # Generate them metrics = {"beams": {}} for beam in BEAMS: metrics["beams"][f"beam{beam}"] = {} # Set output path output_path = os.path.join(eval_path, f"beam{beam}") # Read fairseq-generate output with open(os.path.join(output_path, "generate-test.txt"), 'r') as f: score_summary = f.readlines()[-1] print(score_summary) # Parse metrics pattern = r"beam=(\d+): BLEU\d+ = (\d+.\d+)" beam_width, score_bleu = re.search(pattern, score_summary).groups() beam_width, score_bleu = int(beam_width), float(score_bleu) metrics["beams"][f"beam{beam}"]['fairseq_bleu'] = score_bleu # Sacrebleu: BLEU with open(os.path.join(output_path, "metrics_bleu.txt"), 'r') as f2: score_summary = f2.readlines()[-1] print(score_summary) # Parse metrics pattern = r"BLEU.* = (\d+\.\d+) \d+\.\d+\/" score_bleu = re.search(pattern, score_summary).groups()[0] score_bleu = float(score_bleu) metrics["beams"][f"beam{beam}"]['sacrebleu_bleu'] = score_bleu # Sacrebleu: CHRF with open(os.path.join(output_path, "metrics_chrf.txt"), 'r') as f3: score_summary = f3.readlines()[-1] print(score_summary) # Parse metrics pattern = r"chrF2.* = (\d+\.\d+)\s*$" score_chrf = re.search(pattern, score_summary).groups()[0] score_chrf = float(score_chrf) metrics["beams"][f"beam{beam}"]['sacrebleu_chrf'] = score_chrf # # Sacrebleu: TER # with open(os.path.join(output_path, "metrics_ter.txt"), 'r') as f4: # score_summary = f4.readlines()[-1] # print(score_summary) # # # Parse metrics # pattern = r"TER.* = (\d+\.\d+)\s*$" # score_ter = re.search(pattern, score_summary).groups()[0] # score_ter = float(score_ter) # metrics["beams"][f"beam{beam}"]['sacrebleu_ter'] = score_ter # Save metrics to file with open(os.path.join(eval_path, 'beam_metrics.json'), 'w') as f: json.dump(metrics, f) print("Metrics saved!") print( "------------------------------------------------------------------------" )
def evaluate_hbm(model, criterion, src_tok, trg_tok, train_domain, basepath, datapath_clean, start_time): # Get all folders in the root path test_datasets = [ os.path.join(DATASETS_PATH, TOK_FOLDER, x) for x in [ f"health_{src_tok.lang}-{trg_tok.lang}", f"biological_{src_tok.lang}-{trg_tok.lang}", f"merged_{src_tok.lang}-{trg_tok.lang}" ] ] for test_dataset in test_datasets: test_domain, (test_src, test_trg) = utils.get_dataset_ids(test_dataset) print("#############################################") print( f"=> TESTING MODEL FROM '{train_domain}' IN DOMAIN '{test_domain}'" ) # Get datasets test_ds = TranslationDataset( os.path.join(test_dataset, datapath_clean), src_tok, trg_tok, "test") # Get dataloaders test_loader = base.get_data_loader(SAMPLER_NAME, test_ds, BATCH_SIZE, MAX_TOKENS, NUM_WORKERS, shuffle=False) # # Evaluate start_time2 = time.time() val_loss, val_translations = base.evaluate(model, test_loader, criterion, device=DEVICE1) # Log progress metrics = base.log_progress(epoch_i=0, start_time=start_time2, tr_loss=None, val_loss=val_loss, tb_writer=None, translations=val_translations, print_translations=False, prefix=None) # Create path eval_name = test_domain eval_path = os.path.join(basepath, DATASET_EVAL_NAME, model_name, eval_name) Path(eval_path).mkdir(parents=True, exist_ok=True) # Generate them metrics = {"beams": {}} for beam in BEAMS: print(f"Computing beam width={beam}...") # Create output path output_path = os.path.join(eval_path, f"beam{beam}") Path(output_path).mkdir(parents=True, exist_ok=True) print(f"\t- Generating translations for: {test_domain}...") # Get translations (using beam search) src_dec_all, hyp_dec_all, ref_dec_all = base.get_translations( test_loader, model, device=DEVICE1, max_length=MAX_LENGTH, beam_width=beam) # Print translations if PRINT_TRANSLATIONS: helpers.print_translations(hyp_dec_all, ref_dec_all, src_dec_all, limit=50, randomized=False) # Compute scores metrics["beams"][f"beam{beam}"] = base.compute_metrics( hyp_dec_all, ref_dec_all, use_ter=False) print( f'Translation scores (beam_width={beam}; max_length={MAX_LENGTH})' ) print( f'\t- Sacrebleu (bleu): {metrics[f"beam{beam}"]["sacrebleu_bleu"]:.2f}' ) # print(f'\t- Sacrebleu (ter): {metrics[f"beam{beam}"]["sacrebleu_ter"]:.2f}') print( f'\t- Sacrebleu (chrf): {metrics[f"beam{beam}"]["sacrebleu_chrf"]:.2f}' ) print( f'\t- Torchtext (bleu): {metrics[f"beam{beam}"]["torchtext_bleu"]:.2f}' ) # Save translations to file with open(os.path.join(output_path, 'src.txt'), 'w') as f: f.writelines("%s\n" % s for s in src_dec_all) with open(os.path.join(output_path, 'hyp.txt'), 'w') as f: f.writelines("%s\n" % s for s in hyp_dec_all) with open(os.path.join(output_path, 'ref.txt'), 'w') as f: f.writelines("%s\n" % s for s in ref_dec_all) print(f"Translations written! => Path: {output_path}") # Generate beam metrics print(f"\t- Generating translations for: {test_domain}...") subprocess.call( ['sh', './scripts/6_sacrebleu.sh', eval_path, output_path]) metrics["beams"].update(get_beam_scores(output_path, beam)) # Save metrics to file with open(os.path.join(eval_path, 'beam_metrics.json'), 'w') as f: json.dump(metrics, f) print("Metrics saved!") print( "\t- To get BLEU/CHRF/TER use: 'cat hyp.txt | sacrebleu ref.txt --metrics bleu'" ) print("\t- To get CHRF use: 'chrf -R ref.txt -H hyp.txt'") print("************************************************************") epoch_hours, epoch_mins, epoch_secs = helpers.epoch_time( start_time, end_time=time.time()) print(f'Time experiment: {epoch_hours}h {epoch_mins}m {epoch_secs}s') print("************************************************************") print("Done!")