def test_lazy_add(self): tree = etree.fromstring(input_xml4) so = standoffconverter.Standoff(tree) view = (standoffconverter.View(so.table).insert_tag_text( "lb", "\n").exclude_outside("p")) plain, lookup = view.get_plain() nlp = English() nlp.add_pipe('sentencizer') for isent, sent in enumerate(nlp(plain).sents): start_ind = lookup.get_pos(sent.start_char) end_ind = lookup.get_pos(sent.end_char - 1) + 1 so.add_inline(begin=start_ind, end=end_ind, tag="s", depth=None, attrib={'id': f'{isent}'}, lazy=True) so.recreate_subtree(so.text_el.find('./body')) output_xml = etree.tostring(so.tree).decode("utf-8") expected_output = """<TEI> <teiHeader> </teiHeader> <text> <body> <p><s id="0">1 2 3 4.</s> <s id="1">5 6<lb/> 7 9 10.</s></p> <p> <s id="2">11 12 13 14</s></p> </body> </text></TEI>""" self.assertTrue(output_xml == expected_output)
def test_view_shrink_whitespace_2(self): tree = etree.fromstring(input_xml3) so = standoffconverter.Standoff(tree) view = standoffconverter.View(so.table) view = view.shrink_whitespace() plain, lookup = view.get_plain() self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
def test_view_1(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Converter(tree) mask = np.zeros(len(so.table), dtype=bool) mask[10:20] = True view = standoffconverter.View(so, mask) self.assertTrue(view.standoff_char_pos(0) == (10, 10))
def test_view_shrink_whitespace_1(self): tree = etree.fromstring(input_xml2) so = standoffconverter.Standoff(tree) view = standoffconverter.View(so.table) view = view.shrink_whitespace() plain, lookup = view.get_plain() self.assertTrue(so.table.df.iloc[lookup.get_table_index( plain.index("7"))].text == "7") self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
def test_view_insert_tag_text(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) view = standoffconverter.View(so.table) view.insert_tag_text("lb", "\n") plain, lookup = view.get_plain() self.assertTrue(so.table.df.iloc[lookup.get_table_index( plain.index("12"))].text == "1") self.assertTrue(plain == '1 2 3 4 5 6 7 9 10 11\n 12 13 14')
def test_view_exclude_2(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=2, end=5, tag="xx", depth=None, attrib={"resp": "machine"}) view = standoffconverter.View(so.table) view = view.exclude_outside(["xx"]) plain, lookup = view.get_plain() self.assertTrue(plain == '2 3')
def test_view_exclude_1(self): tree = etree.fromstring(input_xml1) so = standoffconverter.Standoff(tree) so.add_inline(begin=2, end=4, tag="xx", depth=None, attrib={"resp": "machine"}) view = standoffconverter.View(so.table) view = view.exclude_inside(["xx"]) plain, lookup = view.get_plain() self.assertTrue(so.table.df.iloc[lookup.get_table_index( plain.index("5"))].text == "5")