Exemplo n.º 1
0
def create_lemma_data(data=("metis", "presemt-dev", "wmt08", "wmt09", "wmt10", "wmt11"), lang_pairs=()):
    for data_set in data:
        for lang_pair in lang_pairs or config["eval"][data_set].keys():
            target_lang = lang_pair.split("-")[1]
            lemma_ref_fname = config["eval"][data_set][lang_pair]["lemma_ref_fname"]
            create_dirs(lemma_ref_fname)
            lemmatize(config["eval"][data_set][lang_pair]["word_ref_fname"], target_lang, outf=lemma_ref_fname)
Exemplo n.º 2
0
def lemmatize_reference():
    """
    Create lemmatized versions of the reference translations used for evaluation.
    """
    lemmatize("sample_out_de-en.ref", "en", 
              "lemma_sample_out_de-en.ref")
    lemmatize("sample_newstest2011-ref.de.sgm", "de",
              "lemma_sample_newstest2011-ref.de.sgm")