Пример #1
0
def get_outlines(doc, pageslist, pagesdict):
    result = []
    for (_, title, destname, actionref, _) in doc.get_outlines():
        if destname is None and actionref:
            action = pdftypes.resolve1(actionref)
            if isinstance(action, dict):
                subtype = action.get('S')
                if subtype is PSLiteralTable.intern('GoTo'):
                    destname = action.get('D')
        if destname is None:
            continue
        dest = resolve_dest(doc, destname)

        # consider targets of the form [page /XYZ left top zoom]
        if dest[1] is PSLiteralTable.intern('XYZ'):
            (pageref, _, targetx, targety) = dest[:4]

            if type(pageref) is int:
                page = pageslist[pageref]
            elif isinstance(pageref, pdftypes.PDFObjRef):
                page = pagesdict[pageref.objid]
            else:
                sys.stderr.write(
                    'Warning: unsupported pageref in outline: %s\n' % pageref)
                page = None

            if page:
                pos = Pos(page, targetx, targety)
                result.append(Outline(title, destname, pos))
    return result
Пример #2
0
def get_outlines(doc, pagesdict):
    result = []
    for (_, title, destname, actionref, _) in doc.get_outlines():
        if destname is None and actionref:
            action = actionref.resolve()
            if isinstance(action, dict):
                subtype = action.get('S')
                if subtype is PSLiteralTable.intern('GoTo'):
                    destname = action.get('D')
        if destname is None:
            continue
        dest = resolve_dest(doc, destname)
        # consider targets of the form [page /XYZ left top zoom]
        if dest[1] is PSLiteralTable.intern('XYZ'):
            (pageref, _, targetx, targety, _) = dest
            page = pagesdict[pageref.objid]
            pos = Pos(page, targetx, targety)
            result.append(Outline(title, destname, pos))
    return result
Пример #3
0
def get_outlines(doc, pagesdict):
    result = []
    for (_, title, destname, actionref, _) in doc.get_outlines():
        if destname is None and actionref:
            action = actionref.resolve()
            if isinstance(action, dict):
                subtype = action.get('S')
                if subtype is PSLiteralTable.intern('GoTo'):
                    destname = action.get('D')
        if destname is None:
            continue
        dest = resolve_dest(doc, destname)
        pageno = pagesdict[dest[0].objid]
        (_, _, targetx, targety, _) = dest
        result.append(Outline(title, destname, pageno, targetx, targety))
    return result
Пример #4
0
#!/usr/bin/env python
import sys, zlib
from pdfminer.lzw import LZWDecoder
from pdfminer.psparser import PSException, PSObject, \
     PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
     literal_name, keyword_name, STRICT

LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
LITERALS_ASCIIHEX_DECODE = (PSLiteralTable.intern('ASCIIHexDecode'), PSLiteralTable.intern('AHx'))


##  PDF Objects
##
class PDFObject(PSObject): pass

class PDFException(PSException): pass
class PDFTypeError(PDFException): pass
class PDFValueError(PDFException): pass
class PDFNotImplementedError(PSException): pass


##  PDFObjRef
##
class PDFObjRef(PDFObject):
  
  def __init__(self, doc, objid, _):
    if objid == 0:
      if STRICT:
Пример #5
0
from pdfminer.psparser import PSStackParser, PSSyntaxError, PSEOF, \
     PSLiteralTable, PSKeywordTable, literal_name, keyword_name, STRICT
from pdfminer.pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
     PDFStream, PDFObjRef, resolve1, decipher_all, \
     int_value, float_value, num_value, str_value, list_value, dict_value, stream_value


##  Exceptions
##
class PDFSyntaxError(PDFException): pass
class PDFNoValidXRef(PDFSyntaxError): pass
class PDFEncryptionError(PDFException): pass
class PDFPasswordIncorrect(PDFEncryptionError): pass

# some predefined literals and keywords.
LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
LITERAL_XREF = PSLiteralTable.intern('XRef')
LITERAL_PAGE = PSLiteralTable.intern('Page')
LITERAL_PAGES = PSLiteralTable.intern('Pages')
LITERAL_CATALOG = PSLiteralTable.intern('Catalog')


##  XRefs
##
class XRefObjRange(object):
  def __init__(self, start, nobjs):
    self.start = start
    self.nobjs = nobjs
    return

  def __repr__(self):
Пример #6
0
from pdfminer.pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
from pdfminer.pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
from pdfminer.pdfcolor import PDFColorSpace, PREDEFINED_COLORSPACE, \
     LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
from pdfminer.cmap import CMapDB


##  Exceptions
##
class PDFResourceError(PDFException): pass
class PDFInterpreterError(PDFException): pass


##  Constants
##
LITERAL_PDF = PSLiteralTable.intern('PDF')
LITERAL_TEXT = PSLiteralTable.intern('Text')
LITERAL_FONT = PSLiteralTable.intern('Font')
LITERAL_FORM = PSLiteralTable.intern('Form')
LITERAL_IMAGE = PSLiteralTable.intern('Image')


##  PDFTextState
##
class PDFTextState(object):

  def __init__(self):
    self.font = None
    self.fontsize = 0
    self.charspace = 0
    self.wordspace = 0
Пример #7
0
#!/usr/bin/env python
import sys
from pdfminer.psparser import PSLiteralTable

##  PDFColorSpace
##
LITERAL_DEVICE_GRAY = PSLiteralTable.intern('DeviceGray')
LITERAL_DEVICE_RGB = PSLiteralTable.intern('DeviceRGB')
LITERAL_DEVICE_CMYK = PSLiteralTable.intern('DeviceCMYK')


class PDFColorSpace(object):
    def __init__(self, name, ncomponents):
        self.name = name
        self.ncomponents = ncomponents
        return

    def __repr__(self):
        return '<PDFColorSpace: %s, ncomponents=%d>' % (self.name,
                                                        self.ncomponents)


PREDEFINED_COLORSPACE = dict(
    (name, PDFColorSpace(name, n)) for (name, n) in {
        'CalRGB': 3,
        'CalGray': 1,
        'Lab': 3,
        'DeviceRGB': 3,
        'DeviceCMYK': 4,
        'DeviceGray': 1,
        'Separation': 1,
Пример #8
0
#!/usr/bin/env python
import sys
from pdfminer.psparser import PSLiteralTable


##  PDFColorSpace
##
LITERAL_DEVICE_GRAY = PSLiteralTable.intern('DeviceGray')
LITERAL_DEVICE_RGB = PSLiteralTable.intern('DeviceRGB')
LITERAL_DEVICE_CMYK = PSLiteralTable.intern('DeviceCMYK')

class PDFColorSpace(object):
  
  def __init__(self, name, ncomponents):
    self.name = name
    self.ncomponents = ncomponents
    return
  
  def __repr__(self):
    return '<PDFColorSpace: %s, ncomponents=%d>' % (self.name, self.ncomponents)


PREDEFINED_COLORSPACE = dict(
  (name, PDFColorSpace(name,n)) for (name,n) in {
  'CalRGB': 3,
  'CalGray': 1,
  'Lab': 3,
  'DeviceRGB': 3,
  'DeviceCMYK': 4,
  'DeviceGray': 1,
  'Separation': 1,
Пример #9
0
              char2gid[c] = (unpack('>H', fp.read(2))[0] + idd) & 0xffff
          else:
            for c in xrange(sc, ec+1):
              char2gid[c] = (c + idd) & 0xffff
    gid2char = dict( (gid, pack('>H', char))
                     for (char,gid) in char2gid.iteritems() )
    return CMap().update(char2gid, gid2char)


##  Fonts
##

class PDFFontError(PDFException): pass
class PDFUnicodeNotDefined(PDFFontError): pass

LITERAL_STANDARD_ENCODING = PSLiteralTable.intern('StandardEncoding')
LITERAL_TYPE1C = PSLiteralTable.intern('Type1C')


# PDFFont
class PDFFont(object):
  
  def __init__(self, descriptor, widths, default_width=None):
    self.descriptor = descriptor
    self.widths = widths
    self.fontname = descriptor.get('FontName', 'unknown')
    if isinstance(self.fontname, PSLiteral):
      self.fontname = literal_name(self.fontname)
    self.ascent = num_value(descriptor.get('Ascent', 0))
    self.descent = num_value(descriptor.get('Descent', 0))
    self.default_width = default_width or descriptor.get('MissingWidth', 0)
Пример #10
0

class PDFNoValidXRef(PDFSyntaxError):
    pass


class PDFEncryptionError(PDFException):
    pass


class PDFPasswordIncorrect(PDFEncryptionError):
    pass


# some predefined literals and keywords.
LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
LITERAL_XREF = PSLiteralTable.intern('XRef')
LITERAL_PAGE = PSLiteralTable.intern('Page')
LITERAL_PAGES = PSLiteralTable.intern('Pages')
LITERAL_CATALOG = PSLiteralTable.intern('Catalog')


##  XRefs
##
class XRefObjRange(object):
    def __init__(self, start, nobjs):
        self.start = start
        self.nobjs = nobjs
        return

    def __repr__(self):
Пример #11
0
        return CMap().update(char2gid, gid2char)


##  Fonts
##


class PDFFontError(PDFException):
    pass


class PDFUnicodeNotDefined(PDFFontError):
    pass


LITERAL_STANDARD_ENCODING = PSLiteralTable.intern('StandardEncoding')
LITERAL_TYPE1C = PSLiteralTable.intern('Type1C')


# PDFFont
class PDFFont(object):
    def __init__(self, descriptor, widths, default_width=None):
        self.descriptor = descriptor
        self.widths = widths
        self.fontname = descriptor.get('FontName', 'unknown')
        if isinstance(self.fontname, PSLiteral):
            self.fontname = literal_name(self.fontname)
        self.ascent = num_value(descriptor.get('Ascent', 0))
        self.descent = num_value(descriptor.get('Descent', 0))
        self.default_width = default_width or descriptor.get('MissingWidth', 0)
        self.leading = num_value(descriptor.get('Leading', 0))