示例#1
0
 def test_dict(self):
     self.assertEqual(PDFParser.parse("<< /Foo /Bar >>"),
                      {PDFName("/Foo"): PDFName("/Bar")})
     self.assertEqual(PDFParser.parse("<< /Foo << /Bar /Koo>> >>"),
                      {PDFName("/Foo"): {
                           PDFName("/Bar"): PDFName("/Koo")
                       }})
     self.assertEqual(PDFParser.parse("<< /Foobar13478 123 >>"),
                      {PDFName("/Foobar13478"): 123})
示例#2
0
    def test_comments(self):
        data = """<<
/ID
[<5b><5b>]
% ReportLab generated PDF document -- digest (http://www.reportlab.com)

/Info 13 0 R
/Root 12 0 R
/Size 17
>>"""
        PDFParser.parse(data)
示例#3
0
    def test_pdf_comment(self):
        self.assertEqual(
            PDFParser.parse("""<<
			/Length 213 0 R		% Foobar
		>>"""), {
                PDFName("/Length"): PDFXRef(213, 0),
            })
示例#4
0
    def test_realworld_pdf(self):
        self.assertEqual(
            PDFParser.parse("""<<
			/Length 213 0 R
			/PatternType 1
			/BBox [0 0 2596 37]
			/XStep 8243
			/YStep 8243
			/TilingType 1
			/PaintType 1
			/Matrix [ 0.333679 0 0 0.333468 78.832642 172.074584 ]
			/Resources <<
				/XObject <<
					/x211 211 0 R
					>>
				>>
			>>"""), {
                PDFName("/Length"):
                PDFXRef(213, 0),
                PDFName("/PatternType"):
                1,
                PDFName("/BBox"): [0, 0, 2596, 37],
                PDFName("/XStep"):
                8243,
                PDFName("/YStep"):
                8243,
                PDFName("/TilingType"):
                1,
                PDFName("/PaintType"):
                1,
                PDFName("/Matrix"):
                [0.333679, 0, 0, 0.333468, 78.832642, 172.074584],
                PDFName("/Resources"): {
                    PDFName("/XObject"): {
                        PDFName("/x211"): PDFXRef(211, 0),
                    },
                },
            })
        self.assertEqual(
            PDFParser.parse(
                "<< /XHeight 0 /CharSet (/F) /FontFile 2992 0 R >>"), {
                    PDFName("/XHeight"): 0,
                    PDFName("/CharSet"): b"/F",
                    PDFName("/FontFile"): PDFXRef(2992, 0),
                })
示例#5
0
 def test_string_simple(self):
     self.assertEqual(PDFParser.parse("(Foo)"), b"Foo")
     self.assertEqual(PDFParser.parse("(Foo Bar)"), b"Foo Bar")
     self.assertEqual(PDFParser.parse("(Foo   Bar)"), b"Foo   Bar")
     self.assertEqual(PDFParser.parse("(Foo   Bar   )"), b"Foo   Bar   ")
     self.assertEqual(PDFParser.parse("( Foo)"), b" Foo")
     self.assertEqual(PDFParser.parse("(Foo )"), b"Foo ")
     self.assertEqual(PDFParser.parse("( Foo )"), b" Foo ")
示例#6
0
 def test_string_escape_sequences(self):
     self.assertEqual(PDFParser.parse(r"(Foo\n)"), b"Foo\n")
     self.assertEqual(PDFParser.parse(r"(Foo\t\r\n)"), b"Foo\t\r\n")
     self.assertEqual(PDFParser.parse(r"(Foo\(\(\()"), b"Foo(((")
     self.assertEqual(PDFParser.parse(r"(Foo\)\)\)\(\(\()"), b"Foo)))(((")
     self.assertEqual(PDFParser.parse(r"(Foo\)\)\) \\\\\\ \(\(\()"),
                      b"Foo))) \\\\\\ (((")
     self.assertEqual(PDFParser.parse(r"(Foo \\   Bar)"), b"Foo \\   Bar")
     self.assertEqual(PDFParser.parse(r"(Foo \\ \) Bar)"), b"Foo \\ ) Bar")
示例#7
0
 def test_string_nested(self):
     self.assertEqual(PDFParser.parse("(Foo (Bar))"), b"Foo (Bar)")
     self.assertEqual(PDFParser.parse("(Foo( Bar))"), b"Foo( Bar)")
     self.assertEqual(PDFParser.parse("(Foo (Bar)   )"), b"Foo (Bar)   ")
     self.assertEqual(PDFParser.parse("(Foo ( Bar )   )"),
                      b"Foo ( Bar )   ")
     self.assertEqual(PDFParser.parse("(Foo (Klammer) Bar)"),
                      b"Foo (Klammer) Bar")
     self.assertEqual(
         PDFParser.parse("(Foo (Klammer (Klammer2) Yeah) Bar)"),
         b"Foo (Klammer (Klammer2) Yeah) Bar")
     self.assertEqual(
         PDFParser.parse("(Foo (Space)                   Yes)"),
         b"Foo (Space)                   Yes")
示例#8
0
    def __init__(self, objid, gennum, rawdata):
        assert (objid is not None)
        assert (gennum is not None)
        assert (isinstance(objid, int))
        assert (isinstance(gennum, int))
        self._objid = objid
        self._gennum = gennum
        if rawdata is not None:
            strm = StreamRepr(rawdata)
            stream_begin = strm.read_until_token(b"stream")
            if stream_begin is not None:
                stream_data = strm.read_until_token(b"endstream")
                if stream_data is not None:
                    content = stream_begin
                    self._stream = stream_data
                else:
                    # Probably erroneous stream data ("stream" maybe in dict,
                    # but no "endstream")
                    content = rawdata
                    self._stream = None
            else:
                # No stream in this object found, just content
                content = rawdata
                self._stream = None

            content = content.decode("latin1")

            # Remove line continuations
            content = content.replace("\\\r\n", "")
            content = content.replace("\\\n", "")
            content = content.replace("\\\r", "")

            self._content = PDFParser.parse(content)
            if (self._stream is not None) and (
                    PDFName("/Length") in self._content) and isinstance(
                        self._content[PDFName("/Length")], int):
                # When direct length field is given, then truncate the stream
                # according to it. For indirect streams, we don't do this (yet)
                self._stream = self._stream[:self._content[PDFName("/Length")]]
        else:
            self._stream = None
            self._content = None
示例#9
0
 def test_array(self):
     self.assertEqual(PDFParser.parse("[ 1 2 /Foo 3 4 ]"),
                      [1, 2, PDFName("/Foo"), 3, 4])
     self.assertEqual(PDFParser.parse("[ /Foobar13478 /Barfoo999 ]"),
                      [PDFName("/Foobar13478"),
                       PDFName("/Barfoo999")])
     self.assertEqual(
         PDFParser.parse("[ 12345 9999 48 489 8473 << /foo 3939 >>]"),
         [12345, 9999, 48, 489, 8473, {
             PDFName("/foo"): 3939
         }])
     self.assertEqual(
         PDFParser.parse(
             "[ 12345 9999 48 489 R 8473 3.43984 << /foo 3939 >>]"), [
                 12345, 9999,
                 PDFXRef(48, 489), 8473, 3.43984, {
                     PDFName("/foo"): 3939
                 }
             ])
     self.assertEqual(
         PDFParser.parse("[ 0.333679 0 0 0.333468 78.832642 172.074584 ]"),
         [0.333679, 0, 0, 0.333468, 78.832642, 172.074584])
     self.assertEqual(PDFParser.parse("[ 1.2345 1.2345 ]"),
                      [1.2345, 1.2345])
示例#10
0
 def test_string_non_ascii(self):
     self.assertEqual(PDFParser.parse(r"(Foo \1 Bar)"), b"Foo \x01 Bar")
     self.assertEqual(PDFParser.parse(r"(Foo \12 Bar)"), b"Foo \x0a Bar")
     self.assertEqual(PDFParser.parse(r"(Foo \123 Bar)"), b"Foo S Bar")
     self.assertEqual(PDFParser.parse(r"(Foo \333 Bar)"), b"Foo \xdb Bar")
     self.assertEqual(PDFParser.parse("(Foo \xc4 Bar)"), b"Foo \xc4 Bar")
示例#11
0
 def test_name(self):
     self.assertEqual(PDFParser.parse("/Foo"), PDFName("/Foo"))
     self.assertEqual(PDFParser.parse("/Foo#20Bar"), PDFName("/Foo Bar"))
示例#12
0
 def test_float(self):
     self.assertEqual(PDFParser.parse("-9.13478"), -9.13478)
     self.assertEqual(PDFParser.parse("13.478"), 13.478)
     self.assertEqual(PDFParser.parse(".13478"), 0.13478)
     self.assertEqual(PDFParser.parse("000.33478"), 0.33478)
     self.assertEqual(PDFParser.parse("10.000"), 10.0)
示例#13
0
 def test_int(self):
     self.assertEqual(PDFParser.parse("013478"), 13478)
     self.assertEqual(PDFParser.parse("13478"), 13478)
     self.assertEqual(PDFParser.parse("-913478"), -913478)
示例#14
0
 def test_xref(self):
     self.assertEqual(PDFParser.parse("123 456 R"), PDFXRef(123, 456))
     self.assertEqual(
         PDFParser.parse("[ 123 456 R 999 888 R ]"),
         [PDFXRef(123, 456), PDFXRef(999, 888)])
示例#15
0
 def _read_trailer(self):
     self._log.debug("Started reading trailer at 0x%x.", self._f.tell())
     (trailer_data, delimiter) = self._f.read_until_token(b"startxref")
     trailer_data = trailer_data.decode("latin1")
     self._trailer = PDFParser.parse(trailer_data)
     self._f.seek(self._f.tell() - len(delimiter))
示例#16
0
 def _read_trailer(self, f):
     self._log.debug("Started reading trailer at 0x%x.", f.tell())
     trailer_data = f.read_until_token(b"startxref", rewind=True)
     trailer_data = trailer_data.decode("latin1")
     trailer = PDFParser.parse(trailer_data)
     return trailer