예제 #1
0
def test_eq_2():
    t1 = AnnotatedText("hello word")
    t2 = AnnotatedText("hello word")
    t1.annotate(0, 5, "Hello")
    t1.annotate(6, 10, "world")
    t2.annotate(6, 10, "world")
    t2.annotate(0, 5, "Hello")
    assert t1 == t2
예제 #2
0
    def test_join_by(self):
        t1 = AnnotatedText("I {likes=>like:::error_type=SVA} turtles.")
        t2 = AnnotatedText("I{,=>:::error_type=Punct} like turtles.")

        actual = AnnotatedText.join("\n", [t1, t2])
        expected = AnnotatedText("I {likes=>like:::error_type=SVA} turtles.\n"
                                 "I{,=>:::error_type=Punct} like turtles.")

        assert expected == actual
예제 #3
0
def test_wrong_constructor_params():

    # Constructing AnnotatedText from anything other than string is forbidden

    with pytest.raises(ValueError):
        AnnotatedText(42)

    annotated = AnnotatedText("hello")
    with pytest.raises(ValueError):
        AnnotatedText(annotated)
예제 #4
0
    def test_parse_colon(self):
        text = AnnotatedText("text {.=>::::key=R:PUNCT}")

        expected = AnnotatedText("text .")
        expected.annotate(5, 6, ":", meta={"key": "R:PUNCT"})

        ann = text.get_annotations()[0]
        expected_ann = expected.get_annotations()[0]
        assert ann.meta == expected_ann.meta
        assert ann.suggestions == expected_ann.suggestions
        assert ann.source_text == expected_ann.source_text
예제 #5
0
파일: corpus.py 프로젝트: MrCry0/ua-gec
 def iter_documents(self):
     for meta in self._get_metadata():
         filename = f"{meta.doc_id}.a{self.annotator_id}.ann"
         path = self._data_dir / meta.partition / "annotated" / filename
         text = AnnotatedText(path.read_text())
         doc = Document(text, meta=meta)
         yield doc
예제 #6
0
 def test_to_str(self):
     text = AnnotatedText("helo world")
     text.annotate(0,
                   4, ["hello", "hola"],
                   meta={"key": "value with spaces"})
     expected = "{helo=>hello|hola:::key=value with spaces} world"
     assert text.get_annotated_text() == expected
예제 #7
0
    def test_parse_no_suggestions(self):
        # When parsing text with the special "NO_SUGGESTIONS" value
        text = AnnotatedText("{asdasd=>NO_SUGGESTIONS}")

        # Then empty suggestions list should be returned
        ann = text.get_annotations()[0]
        assert ann.suggestions == []
예제 #8
0
def test_apply_correction_insert_word():
    text = AnnotatedText("{=>Hello} {word=>World}!")

    ann = text.get_annotations()[0]  # {=>Hello}
    text.apply_correction(ann)

    assert text.get_annotated_text() == "Hello {word=>World}!"
예제 #9
0
def test_apply_correction_with_no_suggestions():
    # Given annotated text
    text = AnnotatedText("Hi {sdf=>NO_SUGGESTIONS}!")

    # When applyinng a no-suggestions annotation
    ann = text.get_annotations()[0]
    text.apply_correction(ann)

    # Then the annotation should be replaced with the original text
    assert text.get_annotated_text() == "Hi sdf!"
예제 #10
0
def test_apply_correction():
    # Given annotated text
    text = AnnotatedText("{helo=>Hello}{...=>,} {word=>World}!")

    # When applying the corrected text of an annotation
    ann = text.get_annotations()[1]  # {...=>,}
    text.apply_correction(ann)

    # Then annotation should be removed, and the correct text used instead
    assert text.get_annotated_text() == "{helo=>Hello}, {word=>World}!"
예제 #11
0
def test_remove_non_existing_annotation():
    # Given annotated text
    text = AnnotatedText("{helo=>Hello} {word=>World}!")

    # When removing annotation that doesn't exists in the text
    # Then error should be raised
    annotation = text.get_annotations()[0]
    text.remove(annotation)
    with pytest.raises(ValueError):
        text.remove(annotation)
예제 #12
0
def test_get_annoation_at():
    text = AnnotatedText("Hello {word=>world}")
    print(text.get_annotations())
    assert text.get_annotation_at(0) is None
    assert text.get_annotation_at(0, 100) is None
    assert text.get_annotation_at(6, 10).source_text == "word"
    assert text.get_annotation_at(6).source_text == "word"
    assert text.get_annotation_at(7).source_text == "word"
    assert text.get_annotation_at(10) is None
    assert text.get_annotation_at(11) is None
예제 #13
0
    def test_annotate_with_none(self):
        # Some checks return None in place of suggestions, meaning
        # that there are no good suggestions.
        # The convention for those in the light-annotated format is
        # {bad=>NO_SUGGESTIONS} annotation.
        text = AnnotatedText("asdasd")
        text.annotate(0, len("asdasd"), correct_value=None)
        assert text.get_annotated_text() == "{asdasd=>NO_SUGGESTIONS}"

        # When parsed, special value of "NO_SUGGESTIONS" should be
        # represented as empty suggestions list
        assert text.get_annotations()[0].suggestions == []
예제 #14
0
    def test_parse(self):
        text = AnnotatedText(
            "Hello {wold=>World|world:::type=OOV Spell:::status=ok}")

        expected_original = "wold"
        expected_sugg = ["World", "world"]
        expected_meta = {"type": "OOV Spell", "status": "ok"}

        ann = text.get_annotations()[0]
        assert ann.meta == expected_meta
        assert ann.suggestions == expected_sugg
        assert ann.source_text == expected_original
예제 #15
0
def test_remove():
    # Given annotated text
    text = AnnotatedText("{helo=>Hello} {word=>World}!")

    # When removing single annotation
    annotation = text.get_annotations()[1]
    assert annotation.source_text == "word"
    text.remove(annotation)

    # Then all other annotations should stay
    expected = "{helo=>Hello} word!"
    assert text.get_annotated_text() == expected
    assert len(text.get_annotations()) == 1
예제 #16
0
def test_iter_annotations():
    # Given annotated text
    text = AnnotatedText("{helo=>Hello}{...=>,} {word=>World}!")

    #  When changing annotations during annotations iteration
    for i, ann in enumerate(text.iter_annotations()):
        if i == 1:
            text.remove(ann)  # {...=>,}
        else:
            text.apply_correction(ann)

    # Then the text should be correctly updated
    assert text.get_annotated_text() == "Hello... World!"
예제 #17
0
def test_get_annotations():
    text = AnnotatedText(
        "this {was=>is|will be} an {anotated=>annotated} text")
    anns = text.get_annotations()
    assert len(anns) == 2
    assert anns[0].source_text == "was"
    assert anns[0].suggestions == ["is", "will be"]
    assert anns[0].start == len("this ")
    assert anns[0].end == len("this was")

    assert anns[1].source_text == "anotated"
    assert anns[1].suggestions == ["annotated"]
    assert anns[1].start == len("this was an ")
    assert anns[1].end == len("this was an anotated")
예제 #18
0
def test_curly_braces_in_original_text():
    text = AnnotatedText(r"(e.g. \emph{the, {dox=>fox}, jumps})")
    expected = r"(e.g. \emph{the, dox, jumps})"
    assert text.get_original_text() == expected
예제 #19
0
 def test_error(self):
     text = AnnotatedText("{helllo=>Hello} {world=>World}")
     with pytest.raises(OverlapError):
         text.annotate(2, 5, "ll")
예제 #20
0
def test_repr():
    text = AnnotatedText("Hello {word=>world}!")
    expected = "<AnnotatedText('Hello {word=>world}!')>"
    assert repr(text) == expected
예제 #21
0
def test_string_representation():
    # AnnotatedText should pretend
    text = AnnotatedText("Hello {word=>world}!")
    expected = "Hello {word=>world}!"
    assert str(text) == expected
예제 #22
0
def test_not_eq_other_type():
    text = "Text {=>tokens} tokens ."
    text_1 = AnnotatedText(text)
    assert text_1 != text
예제 #23
0
def test_eq():
    text = "Text {=>tokens} tokens ."
    text_1 = AnnotatedText(text)
    text_2 = AnnotatedText(text)
    assert text_1 == text_2
예제 #24
0
def test_get_original_text():
    text = AnnotatedText("{helo=>Hello} world!")
    expected = "helo world!"
    assert text.get_original_text() == expected
예제 #25
0
 def test_annotate_empty_lengthy_intersect(self):
     text = AnnotatedText("Did n't know it.")
     text.annotate(12, 12, [" about"])
     with pytest.raises(OverlapError):
         text.annotate(0, len("Did n't know it."), ["Did n't know it"])
예제 #26
0
def test_get_corrected_text_with_highlight():
    text = AnnotatedText("{helo=>NO_SUGGESTIONS} world!")
    expected = "helo world!"
    assert text.get_corrected_text() == expected
예제 #27
0
def test_get_corrected_text():
    text = AnnotatedText("{helo=>Hello} world!")
    expected = "Hello world!"
    assert text.get_corrected_text() == expected
예제 #28
0
def test_not_eq():
    text_1 = AnnotatedText("Text {=>tokens} tokens .")
    text_2 = AnnotatedText("Text {=>TOKENS} tokens .")
    assert text_1 != text_2
예제 #29
0
 def test_forbid_joining_annotation_with_multiple_suggestions(self):
     text = AnnotatedText("helloworld")
     text.annotate(0, len("helloworld"), ["hello", "hola", "hi"])
     with pytest.raises(OverlapError):
         text.annotate(0, len("helloworld"), "world")
예제 #30
0
def test_get_annotated_text_same_span():
    text = AnnotatedText("{=>b}{=>a}")
    assert text.get_annotated_text() == "{=>b}{=>a}"