Exemplo n.º 1
0
    def test_addboxes(self):
        box1 = Textbox([Textelement("hey ", tag=None)],
                       fontid=None,
                       top=0,
                       left=0,
                       width=50,
                       height=10,
                       lines=1)
        box2 = Textbox([Textelement("ho", tag=None)],
                       fontid=None,
                       top=0,
                       left=50,
                       width=40,
                       height=10,
                       lines=1)

        combinedbox = box1 + box2
        want = """
<Textbox bottom="10" fontid="0" height="10" left="0" lineheight="0" lines="1" right="90" top="0" width="90">
  <Textelement>hey ho</Textelement>
</Textbox>
"""
        self.assertEqual(want[1:], serialize(combinedbox))
        # make sure __iadd__ performs like __add__
        box1 += box2
        self.assertEqual(want[1:], serialize(box1))
Exemplo n.º 2
0
    def test_leading_tag(self):
        body = Textbox([
            Textelement("bold", tag="b"),
            Textelement("normal", tag=None),
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px"><b>bold</b>normal</p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 3
0
    def test_superscripts(self):
        body = Textbox([
            Textelement("1", tag="sup"),
            Textelement("2", tag="is"),
            Textelement("3", tag="bis")
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px"><sup>1</sup><i><sup>2</sup></i><b><i><sup>3</sup></i></b></p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 4
0
    def test_add_different_types(self):
        box1 = Textbox([Textelement("hey", tag=None)],
                       fontid=None,
                       top=0,
                       left=0,
                       width=50,
                       height=10,
                       lines=1)
        box2 = Textbox([LinkedTextelement("1", tag="s", uri="foo.html")],
                       fontid=None,
                       top=0,
                       left=50,
                       width=5,
                       height=10,
                       lines=1)
        combinedbox = box1 + box2
        want = """
<Textbox bottom="10" fontid="0" height="10" left="0" lineheight="0" lines="1" right="55" top="0" width="55">
  <Textelement>hey</Textelement>
  <LinkedTextelement tag="s" uri="foo.html">1</LinkedTextelement>
</Textbox>
"""
        self.assertEqual(want[1:], serialize(combinedbox))
        # make sure __iadd__ performs like __add__
        box1 += box2
        self.assertEqual(want[1:], serialize(box1))
Exemplo n.º 5
0
    def test_tag_merge(self):
        body = Textbox([
            Textelement("identical ", tag=None),
            Textelement("tags ", tag=None),
            Textelement("should ", tag="b"),
            Textelement("merge", tag="b"),
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">identical tags <b>should merge</b></p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 6
0
    def test_elements_with_tags(self):
        body = Textbox([
            Textelement("normal", tag=None),
            Textelement("bold", tag="b"),
            Textelement("italic", tag="i"),
            Textelement("both", tag="bi")
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">normal<b>bold</b><i>italic</i><b><i>both</i></b></p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 7
0
    def test_linkelements(self):
        body = Textbox([
            Textelement("normal", tag=None),
            LinkedTextelement("link", uri="http://example.org/", tag=None),
            Textelement("footnote marker", tag="sup"),
            LinkedTextelement(
                "linked footnote marker", uri="http://example.org/", tag="s")
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)

        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">normal<a href="http://example.org/">link</a><sup>footnote marker</sup><a href="http://example.org/"><sup>linked footnote marker</sup></a></p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 8
0
    def test_basic(self):
        body = Textbox([Textelement("test", tag=None)],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">test</p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 9
0
    def test_empty_removal(self):
        body = Textbox([
            LinkedTextelement("  ", uri="index.html#24", tag=None),
            Textelement("23", tag=None)
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">23</p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 10
0
 def __call__(self, textbox, fontspecs):
     if 'encoding' not in fontspecs[
             textbox.fontid]:  # only for some testcases
         return textbox
     if fontspecs[textbox.fontid]['encoding'] != "Custom":
         return textbox
     # NOTE: This weird checking for occurrences of 'i'
     # tags is needed for functionalSources.
     # TestPropRegeringen.test_parse_1999_2000_17 to pass
     # (and matches encoding usage in practice)
     decode_all = not ('i' in [getattr(x, 'tag', None) for x in textbox])
     for idx, subpart in enumerate(textbox):
         if (isinstance(subpart, Textelement)
                 and (decode_all or subpart.tag == 'i')):
             textbox[idx] = Textelement(self.decode_string(
                 subpart, self.map),
                                        tag=subpart.tag)
     return textbox
Exemplo n.º 11
0
    def test_other_elements(self):
        body = Textbox([
            Textelement("plaintext ", tag=None),
            LinkSubject("link",
                        uri="http://example.org/",
                        predicate="dcterms:references"), " raw string"
        ],
                       top=0,
                       left=0,
                       width=100,
                       height=100,
                       fontid=0)
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">plaintext <a href="http://example.org/" rel="dcterms:references">link</a> raw string</p>
"""
        self._test_asxhtml(want, body)

        # remove the last str so that the linksubject becomes the last item
        body[:] = body[:-1]
        want = """
<p xmlns="http://www.w3.org/1999/xhtml" class="textbox fontspec0" style="top: 0px; left: 0px; height: 100px; width: 100px">plaintext <a href="http://example.org/" rel="dcterms:references">link</a></p>
"""
        self._test_asxhtml(want, body)
Exemplo n.º 12
0
 def __call__(self, textbox, fontspecs):
     if fontspecs[textbox.fontid]['encoding'] != "Custom":
         return textbox
     if textbox.font.family == "Times.New.Roman.Fet0100":
         boundary = None
         # extra special hack for prop 1997/98:44 which has
         # textelements marked as having a font with custom
         # encoding, but where only the bolded part (which
         # isn't marked up...) is encoded, while the rest is
         # unencoded. The "g" is a encoded section sign, which
         # in these cases is the last encoded char.
         if (len(textbox[0].split(" ", 2)) == 3
                 and textbox[0].split(" ", 2)[1] == "g"):
             boundary = textbox[0].index(" ", textbox[0].index(" ") + 1)
         # a similar situation with paragraphs with leading bold
         # type, where the bold text is any of 3-4 fixed strings
         # (Note: the xml data doesn't contain any information
         # about the text being bold, or rather that the following
         # text is non-bold)
         else:
             m = self.re_fixedleaders.match(textbox[0])
             if m:
                 boundary = m.end()
         if boundary:
             orig = str(textbox[0])
             textbox[0] = Textelement(self.decode_string(
                 orig[:boundary], self.map),
                                      tag="b")
             textbox.insert(1, Textelement(orig[boundary:], tag=None))
             # Find the id for the "real" non-bold font. I think
             # that in every known case the fontid should simply be
             # the default font (id=0). Maybe we could hardcode
             # that right away, like we hardcode the font family
             # name right now.
             newfontid = self.find_fontid(fontspecs, "Times-Roman",
                                          textbox.font.size)
             expected_length = 2
         else:
             textbox[0] = Textelement(self.decode_string(
                 textbox[0], self.map),
                                      tag=textbox[0].tag)
             expected_length = 1
             newfontid = textbox.fontid
         if len(
                 textbox
         ) > expected_length:  # the <text> element contained subelements
             # save and remove the 1-2 textelements we've processed
             decoded = textbox[:expected_length]
             textbox[:] = textbox[expected_length:]
             # do the default decoding
             textbox = super(OffsetDecoder20,
                             self).__call__(textbox, fontspecs)
             # then add the previously procesed elements
             textbox[:] = decoded + textbox[:]
         if newfontid != textbox.fontid:
             # invalidate the cached property
             del textbox.__dict__['font']
             textbox.fontid = newfontid
     else:
         textbox = super(OffsetDecoder20, self).__call__(textbox, fontspecs)
         # again, if one or more textelements have an "i" tag, the
         # font for the entire textbox probably shouldn't be
         # specced as an italic ("Kursiv")
         if textbox.font.family == "Times.New.Roman.Kursiv0104" and "i" in [
                 x.tag for x in textbox
         ]:
             newfontid = self.find_fontid(fontspecs, "Times-Roman",
                                          textbox.font.size)
             # invalidate the cached property
             del textbox.__dict__['font']
             textbox.fontid = newfontid
     return textbox