Пример #1
0
def test_preannotate_passage(create, as_array, convert_and_back, partial, monkeypatch):
    if not partial:
        monkeypatch.setattr(textutil, "get_nlp", assert_spacy_not_loaded)
    passage = create()
    l0 = passage.layer(layer0.LAYER_ID)
    attr_values = list(range(10, 10 + len(textutil.Attr)))
    if partial:
        attr_values[textutil.Attr.ENT_TYPE.value] = ""
    if as_array:
        l0.extra["doc"] = [len(p) * [attr_values] for p in textutil.break2paragraphs(passage, return_terminals=True)]
    else:
        for terminal in l0.all:
            for attr, value in zip(textutil.Attr, attr_values):
                if value:
                    terminal.extra[attr.key] = value
    passage = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back]
    if not partial:
        assert textutil.is_annotated(passage, as_array=as_array), "Passage %s is not pre-annotated" % passage.ID
    textutil.annotate(passage, as_array=as_array)
    assert textutil.is_annotated(passage, as_array=as_array), "Passage %s is not annotated" % passage.ID
    for terminal in l0.all:
        for i, (attr, value) in enumerate(zip(textutil.Attr, attr_values)):
            if value:
                assert (terminal.tok[i] if as_array else terminal.extra.get(attr.key)) == value, \
                    "Terminal %s has wrong %s" % (terminal, attr.name)
Пример #2
0
def test_preannotate_passage(create, as_array, convert_and_back, partial, monkeypatch):
    if not partial:
        monkeypatch.setattr(textutil, "get_nlp", assert_spacy_not_loaded)
    passage = create()
    l0 = passage.layer(layer0.LAYER_ID)
    attr_values = list(range(10, 10 + len(textutil.Attr)))
    if partial:
        attr_values[textutil.Attr.ENT_TYPE.value] = ""
    if as_array:
        l0.extra["doc"] = [len(p) * [attr_values] for p in textutil.break2paragraphs(passage, return_terminals=True)]
    else:
        for terminal in l0.all:
            for attr, value in zip(textutil.Attr, attr_values):
                if value:
                    terminal.extra[attr.key] = value
    passage = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back]
    if not partial:
        assert textutil.is_annotated(passage, as_array=as_array, as_extra=not as_array), \
            "Passage %s is not pre-annotated" % passage.ID
    textutil.annotate(passage, as_array=as_array, as_extra=not as_array)
    assert textutil.is_annotated(passage, as_array=as_array, as_extra=not as_array), \
        "Passage %s is not annotated" % passage.ID
    for terminal in l0.all:
        for i, (attr, value) in enumerate(zip(textutil.Attr, attr_values)):
            if value:
                assert (terminal.tok[i] if as_array else terminal.extra.get(attr.key)) == value, \
                    "Terminal %s has wrong %s" % (terminal, attr.name)
Пример #3
0
 def test_annotate_all(self):
     passages = [
         convert.from_standard(
             TestUtil.load_xml("test_files/standard3.xml")),
         TestUtil.create_passage(),
         TestUtil.create_crossing_passage(),
         TestUtil.create_discontiguous(),
         TestUtil.create_multi_passage()
     ]
     list(textutil.annotate_all(passages))
     for passage, compare in textutil.annotate_all(
         ((p, p) for p in passages), as_array=True, as_tuples=True):
         assert passage is compare
         for p in passage, convert.from_standard(
                 convert.to_standard(passage)):
             self.assertTrue(is_annotated(p, as_array=True),
                             "Passage %s is not annotated" % passage.ID)
             self.assertTrue(is_annotated(p, as_array=False),
                             "Passage %s is not annotated" % passage.ID)
             for terminal in p.layer(layer0.LAYER_ID).all:
                 for attr in textutil.Attr:
                     self.assertIn(
                         attr.key, terminal.extra,
                         "Terminal %s in passage %s has no %s" %
                         (terminal, passage.ID, attr.name))
                 self.assertIsNotNone(
                     terminal.tok,
                     "Terminal %s in passage %s has no annotation" %
                     (terminal, passage.ID))
                 self.assertEqual(len(terminal.tok), len(textutil.Attr))
Пример #4
0
def main(args):
    for passage in annotate_all(get_passages_with_progress_bar(
            args.filenames, desc="Annotating"),
                                replace=True,
                                as_array=args.as_array,
                                verbose=args.verbose):
        assert is_annotated(
            passage, args.as_array), "Passage %s is not annotated" % passage.ID
        write_passage(passage, outdir=args.out_dir, verbose=args.verbose)
Пример #5
0
 def test_annotate_passage(self):
     passage = convert.from_standard(
         TestUtil.load_xml("test_files/standard3.xml"))
     textutil.annotate(passage)
     textutil.annotate(passage, as_array=True)
     for p in passage, convert.from_standard(convert.to_standard(passage)):
         self.assertTrue(is_annotated(p, as_array=True),
                         "Passage %s is not annotated" % passage.ID)
         self.assertTrue(is_annotated(p, as_array=False),
                         "Passage %s is not annotated" % passage.ID)
         for terminal in p.layer(layer0.LAYER_ID).all:
             for attr in textutil.Attr:
                 self.assertIn(
                     attr.key, terminal.extra,
                     "Terminal %s has no %s" % (terminal, attr.name))
             self.assertIsNotNone(
                 terminal.tok, "Terminal %s has no annotation" % terminal)
             self.assertEqual(len(terminal.tok), len(textutil.Attr))
Пример #6
0
def test_annotate_passage(create, as_array):
    passage = create()
    textutil.annotate(passage, as_array=as_array)
    for p in passage, convert.from_standard(convert.to_standard(passage)):
        assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID
        for terminal in p.layer(layer0.LAYER_ID).all:
            if as_array:
                assert terminal.tok is not None, "Terminal %s has no annotation" % terminal
                assert len(terminal.tok) == len(textutil.Attr)
            else:
                for attr in textutil.Attr:
                    assert attr.key in terminal.extra, "Terminal %s has no %s" % (terminal, attr.name)
Пример #7
0
def test_annotate_passage(create, as_array):
    passage = create()
    textutil.annotate(passage, as_array=as_array)
    for p in passage, convert.from_standard(convert.to_standard(passage)):
        assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID
        for terminal in p.layer(layer0.LAYER_ID).all:
            if as_array:
                assert terminal.tok is not None, "Terminal %s has no annotation" % terminal
                assert len(terminal.tok) == len(textutil.Attr)
            else:
                for attr in textutil.Attr:
                    assert attr.key in terminal.extra, "Terminal %s has no %s" % (terminal, attr.name)
Пример #8
0
def test_annotate_all(as_array, convert_and_back):
    passages = [create() for create in PASSAGES]
    list(textutil.annotate_all(passages))
    for passage, compare in textutil.annotate_all(((p, p) for p in passages), as_array=as_array, as_tuples=True):
        assert passage is compare
        p = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back]
        assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID
        for terminal in p.layer(layer0.LAYER_ID).all:
            if as_array:
                assert terminal.tok is not None, "Terminal %s in passage %s has no annotation" % (terminal, passage.ID)
                assert len(terminal.tok) == len(textutil.Attr)
            else:
                for attr in textutil.Attr:
                    assert attr.key in terminal.extra, "Terminal %s in passage %s has no %s" % (
                        terminal, passage.ID, attr.name)
Пример #9
0
def test_annotate_all(as_array, convert_and_back):
    passages = [create() for create in PASSAGES]
    list(textutil.annotate_all(passages))
    for passage, compare in textutil.annotate_all(((p, p) for p in passages), as_array=as_array, as_tuples=True):
        assert passage is compare
        p = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back]
        assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID
        for terminal in p.layer(layer0.LAYER_ID).all:
            if as_array:
                assert terminal.tok is not None, "Terminal %s in passage %s has no annotation" % (terminal, passage.ID)
                assert len(terminal.tok) == len(textutil.Attr)
            else:
                for attr in textutil.Attr:
                    assert attr.key in terminal.extra, "Terminal %s in passage %s has no %s" % (
                        terminal, passage.ID, attr.name)
Пример #10
0
def main(args):
    for passage in annotate_all(get_passages_with_progress_bar(args.filenames, desc="Annotating"),
                                replace=True, as_array=args.as_array, verbose=args.verbose):
        assert is_annotated(passage, args.as_array), "Passage %s is not annotated" % passage.ID
        write_passage(passage, outdir=args.out_dir, verbose=args.verbose)