def test_rcs_tcc_ceaf(): "Examples from Luo (2005)" for system, response, expected in RCS14_TCM_RESPS: actual = dict(_get_coref_fscore(RCS14_TCM_GOLD, response, LUO_MEASURES)) check_correct(expected, actual)
def test_luo_ceaf(): "Examples from Luo (2005)" for system, response, expected in LUO05_RESPS: actual = dict(_get_coref_fscore(LUO05_GOLD, response, LUO_MEASURES)) check_correct(expected, actual)
def test_rcs_tca_ceaf(): "Examples from Luo (2005)" for system, response, expected in RCS14_TCA_RESPS: actual = dict(_get_coref_fscore(RCS14_TCA_GOLD, response, LUO_MEASURES)) check_correct(expected, actual)
def test_conll_multi_sysa(): assert check_correct( EXPECTED_CONLL_MULTI_SYSA, _get_stats(CONLL_MULTI_GOLD_UNSTITCHED, CONLL_MULTI_SYSA_UNSTITCHED))
def test_conll_multi_selfeval(): assert check_correct( EXPECTED_CONLL_MULTI_SELFEVAL, _get_stats(CONLL_MULTI_GOLD_UNSTITCHED, CONLL_MULTI_GOLD_UNSTITCHED))
def test_tac_eval(): check_correct(EXPECTED_TAC_SYS, _get_stats(TAC_GOLD_COMB, TAC_SYS_COMB))