示例#1
0
 def test_view_shrink_whitespace_2(self):
     tree = etree.fromstring(input_xml3)
     so = standoffconverter.Standoff(tree)
     view = standoffconverter.View(so.table)
     view = view.shrink_whitespace()
     plain, lookup = view.get_plain()
     self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
示例#2
0
    def test_lazy_add(self):
        tree = etree.fromstring(input_xml4)
        so = standoffconverter.Standoff(tree)

        view = (standoffconverter.View(so.table).insert_tag_text(
            "lb", "\n").exclude_outside("p"))

        plain, lookup = view.get_plain()

        nlp = English()
        nlp.add_pipe('sentencizer')

        for isent, sent in enumerate(nlp(plain).sents):

            start_ind = lookup.get_pos(sent.start_char)
            end_ind = lookup.get_pos(sent.end_char - 1) + 1

            so.add_inline(begin=start_ind,
                          end=end_ind,
                          tag="s",
                          depth=None,
                          attrib={'id': f'{isent}'},
                          lazy=True)

        so.recreate_subtree(so.text_el.find('./body'))
        output_xml = etree.tostring(so.tree).decode("utf-8")
        expected_output = """<TEI>
<teiHeader> </teiHeader>
<text>
    <body>
        <p><s id="0">1 2 3 4.</s> <s id="1">5 6<lb/> 7 9 10.</s></p>
        <p> <s id="2">11 12 13 14</s></p>
    </body>
</text></TEI>"""
        self.assertTrue(output_xml == expected_output)
示例#3
0
    def test_collapsed_table_1(self):

        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        collapsed_table = so.collapsed_table

        self.assertTrue(collapsed_table.iloc[0].text == "1 2 3 4 5 6 7 9 10")
        self.assertTrue(collapsed_table.iloc[3].text == " 12 13 14")
示例#4
0
    def test_remove_annotation(self):

        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        to_remove = so.standoffs[2]
        so.remove_inline(to_remove["el"])
        output_xml = etree.tostring(so.text_el).decode("utf-8")
        expected_output = '<text><body>1 2 3 4 5 6 7 9 10<p> 11<lb/> 12 13 14</p></body></text>'
        self.assertTrue(output_xml == expected_output)
示例#5
0
    def test_view_shrink_whitespace_1(self):
        tree = etree.fromstring(input_xml2)
        so = standoffconverter.Standoff(tree)
        view = standoffconverter.View(so.table)
        view = view.shrink_whitespace()
        plain, lookup = view.get_plain()

        self.assertTrue(so.table.df.iloc[lookup.get_table_index(
            plain.index("7"))].text == "7")
        self.assertTrue(plain == '1 2\n3 4 5 6 7 9 10 11 12 13 14')
示例#6
0
    def test_remove_empty_element(self):

        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        to_remove = so.standoffs[-1]
        so.remove_inline(to_remove['el'])
        output_xml = etree.tostring(so.text_el).decode("utf-8")
        expected_out = '''<text><body><p>1 2 3 4 5 6 7 9 10</p><p> 11 12 13 14</p></body></text>'''

        self.assertTrue(expected_out == output_xml)
示例#7
0
    def test_add_empty_element(self):

        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)

        so.add_inline(begin=1, end=1, tag="lb", depth=None, attrib={})
        output_xml = etree.tostring(so.text_el).decode("utf-8")

        expected_out = '''<text><body><p>1<lb/> 2 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>'''

        self.assertTrue(expected_out == output_xml)
示例#8
0
    def test_add_annotation_fail1(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        with self.assertRaises(ValueError):

            so.add_inline(begin=17,
                          end=19,
                          tag="xx",
                          depth=3,
                          attrib={"resp": "machine"})
            output_xml = etree.tostring(so.text_el).decode("utf-8")
示例#9
0
    def test_add_annotation_1(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        so.add_inline(begin=0,
                      end=1,
                      tag="xx",
                      depth=None,
                      attrib={"resp": "machine"})
        output_xml = etree.tostring(so.text_el).decode("utf-8")
        expected_out = '''<text><body><p><xx resp="machine">1</xx> 2 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>'''

        self.assertTrue(expected_out == output_xml)
示例#10
0
    def test_view_insert_tag_text(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)

        view = standoffconverter.View(so.table)
        view.insert_tag_text("lb", "\n")

        plain, lookup = view.get_plain()

        self.assertTrue(so.table.df.iloc[lookup.get_table_index(
            plain.index("12"))].text == "1")

        self.assertTrue(plain == '1 2 3 4 5 6 7 9 10 11\n 12 13 14')
示例#11
0
 def test_add_annotation_fail2(self):
     tree = etree.fromstring(input_xml1)
     so = standoffconverter.Standoff(tree)
     so.add_inline(begin=2,
                   end=4,
                   tag="xx",
                   depth=None,
                   attrib={"resp": "machine"})
     with self.assertRaises(ValueError):
         so.add_inline(begin=3,
                       end=5,
                       tag="xx",
                       depth=None,
                       attrib={"resp": "machine"})
示例#12
0
    def test_view_exclude_2(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        so.add_inline(begin=2,
                      end=5,
                      tag="xx",
                      depth=None,
                      attrib={"resp": "machine"})
        view = standoffconverter.View(so.table)

        view = view.exclude_outside(["xx"])
        plain, lookup = view.get_plain()

        self.assertTrue(plain == '2 3')
示例#13
0
    def test_span_2(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)

        so.add_span(begin=2,
                    end=22,
                    tag="span",
                    depth=None,
                    attrib=None,
                    id_="test2")

        output_xml = etree.tostring(so.text_el).decode("utf-8")
        expected_output = "<text><body><p>1 <span spanTo=\"test2\"/>2 3 4 5 6 7 9 10</p><p> 11<lb/> <anchor id=\"test2\"/>12 13 14</p></body></text>"
        self.assertTrue(output_xml == expected_output)
示例#14
0
    def test_view_exclude_1(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        so.add_inline(begin=2,
                      end=4,
                      tag="xx",
                      depth=None,
                      attrib={"resp": "machine"})
        view = standoffconverter.View(so.table)

        view = view.exclude_inside(["xx"])
        plain, lookup = view.get_plain()

        self.assertTrue(so.table.df.iloc[lookup.get_table_index(
            plain.index("5"))].text == "5")
示例#15
0
    def test_add_annotation_3(self):
        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)
        so.add_inline(begin=2,
                      end=3,
                      tag="xx",
                      depth=3,
                      attrib={"resp": "machine"})
        so.add_inline(begin=2,
                      end=3,
                      tag="vv",
                      depth=3,
                      attrib={"resp": "machine"})
        output_xml = etree.tostring(so.text_el).decode("utf-8")

        expected_out = '<text><body><p>1 <vv resp="machine"><xx resp="machine">2</xx></vv> 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>'

        # print(expected_out)
        # print(output_xml)

        self.assertTrue(expected_out == output_xml)
示例#16
0
    def test_add_remove_annotation2(self):

        tree = etree.fromstring(input_xml1)
        so = standoffconverter.Standoff(tree)

        for _ in range(5):
            so.add_inline(begin=2,
                          end=3,
                          tag="vv",
                          depth=3,
                          attrib={"resp": "machine"})
        for _ in range(5):
            to_remove = [
                it["el"] for it in so.standoffs if it["el"].tag == 'vv'
            ][0]
            so.remove_inline(to_remove)

        output_xml = etree.tostring(so.text_el).decode("utf-8")

        expected_output = "<text><body><p>1 2 3 4 5 6 7 9 10</p><p> 11<lb/> 12 13 14</p></body></text>"

        self.assertTrue(output_xml == expected_output)
示例#17
0
 def test_from_tree_plain(self):
     tree = etree.fromstring(input_xml1)
     so = standoffconverter.Standoff(tree)
     self.assertTrue(so.plain == '1 2 3 4 5 6 7 9 10 11 12 13 14')
示例#18
0
 def test_collapsed_table_2(self):
     tree = etree.fromstring(input_xml1)
     so = standoffconverter.Standoff(tree)
     collapsed_table = so.collapsed_table
     self.assertTrue(str(collapsed_table.iloc[0].context) == "text>body>p")
示例#19
0
 def test_json(self):
     tree = etree.fromstring(input_xml1)
     so = standoffconverter.Standoff(tree)
     output_json = so.json
     expected_out = '[{"tag": "text", "attrib": {}, "begin": 0, "end": 30, "depth": 0}, {"tag": "body", "attrib": {}, "begin": 0, "end": 30, "depth": 1}, {"tag": "p", "attrib": {}, "begin": 0, "end": 18, "depth": 2}, {"tag": "p", "attrib": {}, "begin": 18, "end": 30, "depth": 2}, {"tag": "lb", "attrib": {}, "begin": 21, "end": 21, "depth": 3}]'
     self.assertTrue(expected_out == output_json)