示例#1
0
def evaluate_unit_overlap(evaluated_sentences, reference_sentences):
    evaluated_words = tuple(chain(*(s.words for s in evaluated_sentences)))
    reference_words = tuple(chain(*(s.words for s in reference_sentences)))
    evaluated_model = TfDocumentModel(evaluated_words)
    reference_model = TfDocumentModel(reference_words)

    return unit_overlap(evaluated_model, reference_model)
示例#2
0
    def test_unit_overlap_half_match(self):
        tokenizer = Tokenizer("czech")
        model1 = TfDocumentModel("Veta aká sa tu len veľmi ťažko hľadá",
                                 tokenizer)
        model2 = TfDocumentModel("Teta ktorá sa tu iba veľmi zle hľadá",
                                 tokenizer)

        self.assertAlmostEqual(unit_overlap(model1, model2), 1 / 3)
示例#3
0
    def test_unit_overlap_no_match(self):
        tokenizer = Tokenizer("czech")
        model1 = TfDocumentModel("Toto je moja veta. To sa nedá poprieť!",
                                 tokenizer)
        model2 = TfDocumentModel("Hento bolo jeho slovo, ale možno klame.",
                                 tokenizer)

        self.assertAlmostEqual(unit_overlap(model1, model2), 0.0)
示例#4
0
    def test_unit_overlap_half_match(self):
        tokenizer = Tokenizer("czech")
        model1 = TfDocumentModel("Veta aká sa tu len veľmi ťažko hľadá",
            tokenizer)
        model2 = TfDocumentModel("Teta ktorá sa tu iba veľmi zle hľadá",
            tokenizer)

        self.assertAlmostEqual(unit_overlap(model1, model2), 1/3)
示例#5
0
    def test_unit_overlap_no_match(self):
        tokenizer = Tokenizer("czech")
        model1 = TfDocumentModel("Toto je moja veta. To sa nedá poprieť!",
            tokenizer)
        model2 = TfDocumentModel("Hento bolo jeho slovo, ale možno klame.",
            tokenizer)

        self.assertAlmostEqual(unit_overlap(model1, model2), 0.0)
示例#6
0
def test_unit_overlap_no_match():
    tokenizer = Tokenizer("czech")
    model1 = TfDocumentModel("Toto je moja veta. To sa nedá poprieť!",
                             tokenizer)
    model2 = TfDocumentModel("Hento bolo jeho slovo, ale možno klame.",
                             tokenizer)

    assert unit_overlap(model1, model2) == approx(0.0)
def test_unit_overlap_wrong_arguments():
    tokenizer = Tokenizer("english")
    model = TfDocumentModel("", tokenizer)

    with pytest.raises(ValueError):
        unit_overlap("model", "model")
    with pytest.raises(ValueError):
        unit_overlap("model", model)
    with pytest.raises(ValueError):
        unit_overlap(model, "model")
示例#8
0
def evaluate(summary, sumref, debug=False):
    sumstring = sumtostr(summary)
    sumtuple = sumtotup(summary)
    refstring = sumtostr(sumref)
    reftuple = sumtotup(sumref)
    summodel = TfDocumentModel(sumstring, Tokenizer("english"))
    refmodel = TfDocumentModel(refstring, Tokenizer("english"))

    if debug:
        print(reftuple)
        print(sumtuple)

    cos_val = cosine_similarity(summodel, refmodel)
    unit_val = unit_overlap(summodel, refmodel)

    precision_val = precision(sumtuple, reftuple)
    recall_val = recall(sumtuple, reftuple)
    f_val = f_score(sumtuple, reftuple)

    return cos_val, unit_val, precision_val, recall_val, f_val
示例#9
0
    def test_unit_overlap_exact_match(self):
        tokenizer = Tokenizer("czech")
        model = TfDocumentModel("Veta aká sa len veľmi ťažko hľadá.",
                                tokenizer)

        self.assertAlmostEqual(unit_overlap(model, model), 1.0)
def test_unit_overlap_half_match():
    tokenizer = Tokenizer("czech")
    model1 = TfDocumentModel("Veta aká sa tu len veľmi ťažko hľadá", tokenizer)
    model2 = TfDocumentModel("Teta ktorá sa tu iba veľmi zle hľadá", tokenizer)

    assert unit_overlap(model1, model2) == approx(1/3)
def test_unit_overlap_no_match():
    tokenizer = Tokenizer("czech")
    model1 = TfDocumentModel("Toto je moja veta. To sa nedá poprieť!", tokenizer)
    model2 = TfDocumentModel("Hento bolo jeho slovo, ale možno klame.", tokenizer)

    assert unit_overlap(model1, model2) == approx(0.0)
def test_unit_overlap_exact_match():
    tokenizer = Tokenizer("czech")
    model = TfDocumentModel("Veta aká sa len veľmi ťažko hľadá.", tokenizer)

    assert unit_overlap(model, model) == approx(1.0)
def test_unit_overlap_empty():
    tokenizer = Tokenizer("english")
    model = TfDocumentModel("", tokenizer)

    with pytest.raises(ValueError):
        unit_overlap(model, model)
示例#14
0
    def test_unit_overlap_exact_match(self):
        tokenizer = Tokenizer("czech")
        model = TfDocumentModel("Veta aká sa len veľmi ťažko hľadá.", tokenizer)

        self.assertAlmostEqual(unit_overlap(model, model), 1.0)