def test_parse_segment_scores(self): scores = parse_segment_scores(score_fname) assert scores.shape[0] == 2489 assert_almost_equal(scores[0]["NIST"], 1.1990) assert_almost_equal(scores[0]["BLEU"], 0.0375)
Scores were generated by running scratch/centroid.py """ from pylab import * from tg.mteval import parse_segment_scores n = 0 figure(figsize=(9, 14)) for data_set in "metis", "presemt-dev": for lang_pair in "de-en", "en-de": scores_fname = "centroid/_centroid_{}_{}/centroid_{}_{}.scores".format( data_set, lang_pair, data_set, lang_pair) scores = parse_segment_scores(scores_fname) for measure in "BLEU", "NIST": n += 1 ax = subplot(4, 2, n) r = np.corrcoef(scores["words"], scores[measure])[1,0] title('{}_{}\n(n={}, r={:.4f})\n'.format(data_set, lang_pair, scores.shape[0], r)) xlabel("#words") ylabel(measure) xmax = 20 if data_set == "metis" else 50 xlim(0, xmax)