def test_preannotate_passage(create, as_array, convert_and_back, partial, monkeypatch): if not partial: monkeypatch.setattr(textutil, "get_nlp", assert_spacy_not_loaded) passage = create() l0 = passage.layer(layer0.LAYER_ID) attr_values = list(range(10, 10 + len(textutil.Attr))) if partial: attr_values[textutil.Attr.ENT_TYPE.value] = "" if as_array: l0.extra["doc"] = [len(p) * [attr_values] for p in textutil.break2paragraphs(passage, return_terminals=True)] else: for terminal in l0.all: for attr, value in zip(textutil.Attr, attr_values): if value: terminal.extra[attr.key] = value passage = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back] if not partial: assert textutil.is_annotated(passage, as_array=as_array), "Passage %s is not pre-annotated" % passage.ID textutil.annotate(passage, as_array=as_array) assert textutil.is_annotated(passage, as_array=as_array), "Passage %s is not annotated" % passage.ID for terminal in l0.all: for i, (attr, value) in enumerate(zip(textutil.Attr, attr_values)): if value: assert (terminal.tok[i] if as_array else terminal.extra.get(attr.key)) == value, \ "Terminal %s has wrong %s" % (terminal, attr.name)
def test_preannotate_passage(create, as_array, convert_and_back, partial, monkeypatch): if not partial: monkeypatch.setattr(textutil, "get_nlp", assert_spacy_not_loaded) passage = create() l0 = passage.layer(layer0.LAYER_ID) attr_values = list(range(10, 10 + len(textutil.Attr))) if partial: attr_values[textutil.Attr.ENT_TYPE.value] = "" if as_array: l0.extra["doc"] = [len(p) * [attr_values] for p in textutil.break2paragraphs(passage, return_terminals=True)] else: for terminal in l0.all: for attr, value in zip(textutil.Attr, attr_values): if value: terminal.extra[attr.key] = value passage = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back] if not partial: assert textutil.is_annotated(passage, as_array=as_array, as_extra=not as_array), \ "Passage %s is not pre-annotated" % passage.ID textutil.annotate(passage, as_array=as_array, as_extra=not as_array) assert textutil.is_annotated(passage, as_array=as_array, as_extra=not as_array), \ "Passage %s is not annotated" % passage.ID for terminal in l0.all: for i, (attr, value) in enumerate(zip(textutil.Attr, attr_values)): if value: assert (terminal.tok[i] if as_array else terminal.extra.get(attr.key)) == value, \ "Terminal %s has wrong %s" % (terminal, attr.name)
def test_annotate_all(self): passages = [ convert.from_standard( TestUtil.load_xml("test_files/standard3.xml")), TestUtil.create_passage(), TestUtil.create_crossing_passage(), TestUtil.create_discontiguous(), TestUtil.create_multi_passage() ] list(textutil.annotate_all(passages)) for passage, compare in textutil.annotate_all( ((p, p) for p in passages), as_array=True, as_tuples=True): assert passage is compare for p in passage, convert.from_standard( convert.to_standard(passage)): self.assertTrue(is_annotated(p, as_array=True), "Passage %s is not annotated" % passage.ID) self.assertTrue(is_annotated(p, as_array=False), "Passage %s is not annotated" % passage.ID) for terminal in p.layer(layer0.LAYER_ID).all: for attr in textutil.Attr: self.assertIn( attr.key, terminal.extra, "Terminal %s in passage %s has no %s" % (terminal, passage.ID, attr.name)) self.assertIsNotNone( terminal.tok, "Terminal %s in passage %s has no annotation" % (terminal, passage.ID)) self.assertEqual(len(terminal.tok), len(textutil.Attr))
def main(args): for passage in annotate_all(get_passages_with_progress_bar( args.filenames, desc="Annotating"), replace=True, as_array=args.as_array, verbose=args.verbose): assert is_annotated( passage, args.as_array), "Passage %s is not annotated" % passage.ID write_passage(passage, outdir=args.out_dir, verbose=args.verbose)
def test_annotate_passage(self): passage = convert.from_standard( TestUtil.load_xml("test_files/standard3.xml")) textutil.annotate(passage) textutil.annotate(passage, as_array=True) for p in passage, convert.from_standard(convert.to_standard(passage)): self.assertTrue(is_annotated(p, as_array=True), "Passage %s is not annotated" % passage.ID) self.assertTrue(is_annotated(p, as_array=False), "Passage %s is not annotated" % passage.ID) for terminal in p.layer(layer0.LAYER_ID).all: for attr in textutil.Attr: self.assertIn( attr.key, terminal.extra, "Terminal %s has no %s" % (terminal, attr.name)) self.assertIsNotNone( terminal.tok, "Terminal %s has no annotation" % terminal) self.assertEqual(len(terminal.tok), len(textutil.Attr))
def test_annotate_passage(create, as_array): passage = create() textutil.annotate(passage, as_array=as_array) for p in passage, convert.from_standard(convert.to_standard(passage)): assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID for terminal in p.layer(layer0.LAYER_ID).all: if as_array: assert terminal.tok is not None, "Terminal %s has no annotation" % terminal assert len(terminal.tok) == len(textutil.Attr) else: for attr in textutil.Attr: assert attr.key in terminal.extra, "Terminal %s has no %s" % (terminal, attr.name)
def test_annotate_all(as_array, convert_and_back): passages = [create() for create in PASSAGES] list(textutil.annotate_all(passages)) for passage, compare in textutil.annotate_all(((p, p) for p in passages), as_array=as_array, as_tuples=True): assert passage is compare p = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back] assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID for terminal in p.layer(layer0.LAYER_ID).all: if as_array: assert terminal.tok is not None, "Terminal %s in passage %s has no annotation" % (terminal, passage.ID) assert len(terminal.tok) == len(textutil.Attr) else: for attr in textutil.Attr: assert attr.key in terminal.extra, "Terminal %s in passage %s has no %s" % ( terminal, passage.ID, attr.name)
def main(args): for passage in annotate_all(get_passages_with_progress_bar(args.filenames, desc="Annotating"), replace=True, as_array=args.as_array, verbose=args.verbose): assert is_annotated(passage, args.as_array), "Passage %s is not annotated" % passage.ID write_passage(passage, outdir=args.out_dir, verbose=args.verbose)