Python ContentStream примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyPdf.pdf

Класс/Тип: ContentStream

Примеров на hotexamples.com: 7

Python ContentStream - 7 примеров найдено. Это лучшие примеры Python кода для pyPdf.pdf.ContentStream, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ContentStream(7)

Основные методы

ContentStream (7)

Пример #1

Показать файл

Файл: Driver_Utils.py Проект: rkoots/Document_Obligation

def extract_text(self):
    text = u""
    content = self["/Contents"].getObject()
    if not isinstance(content, ContentStream):
        content = ContentStream(content, self.pdf)
    for operands, operator in content.operations:
        if operator == "Tj":
            _text = operands[0]
            if isinstance(_text, TextStringObject):
                text += _text
        elif operator == "T*":
            text += "\n"
        elif operator == "'":
            text += "\n"
            _text = operands[0]
            if isinstance(_text, TextStringObject):
                text += operands[0]
        elif operator == '"':
            _text = operands[2]
            if isinstance(_text, TextStringObject):
                text += "\n"
                text += _text
        elif operator == "TJ":
            for i in operands[0]:
                if isinstance(i, TextStringObject):
                    text += i
        if text and not text.endswith(" "):
            text += " "  # Don't let words concatenate
    return text

Пример #2

Показать файл

Файл: utils.py Проект: syslabcom/osha.policy

def extractPDFText(self):
    text = u""
    content = self["/Contents"].getObject()
    if not isinstance(content, ContentStream):
        content = ContentStream(content, self.pdf)
    # Note: we check all strings are TextStringObjects.  ByteStringObjects
    # are strings where the byte->string encoding was unknown, so adding
    # them to the text here would be gibberish.
    for operands, operator in content.operations:
        if operator == "Tj":
            _text = operands[0]
            if isinstance(_text, TextStringObject):
                text += _text
        elif operator == "T*":
            text += "\n"
        elif operator == "'":
            text += "\n"
            _text = operands[0]
            if isinstance(_text, TextStringObject):
                text += operands[0]
        elif operator == '"':
            _text = operands[2]
            if isinstance(_text, TextStringObject):
                text += "\n"
                text += _text
        elif operator == "TJ":
            for i in operands[0]:
                if isinstance(i, TextStringObject):
                    text += i
        elif operator == "k":
            text += "\n"
    return text

Пример #3

Показать файл

def extractOperators(self):
    ops = []
    content = self["/Contents"].getObject()
    if not isinstance(content, ContentStream):
        content = ContentStream(content, self.pdf)
    for op in content.operations:
        ops.append(op)
    return ops

Пример #4

Показать файл

Файл: tests.py Проект: pombreda/pdfserver

 def replace_text(cls, page, text, replace):
     # HACK
     from pyPdf.pdf import ContentStream, PageObject
     from pyPdf.generic import TextStringObject, NameObject
     content = ContentStream(page["/Contents"].getObject(), page.pdf)
     for idx in range(len(content.operations)):
         operands, operator = content.operations[idx]
         if operator == 'Tj':
             operands[0] = TextStringObject(operands[0].replace(
                 text, replace))
     new_page = PageObject.createBlankPage(page.pdf)
     new_page.mergePage(page)
     new_page[NameObject('/Contents')] = content
     return new_page

Пример #5

Показать файл

Файл: pdfcontent.py Проект: ffshr/prsannots

def pdf_add_content(content_string, page, scale=1, offsetx=0, offsety=0):
    """Add content to the end of the content stream of the PDF page.
    
    Inputs: content_string  The PDF drawing commands to add, as a single string.
            
            page            The pyPdf.pdf.PageObject to add the content to.
            
            scale           Before adding the content, adjust the the coordinate
            offsetx         system with a (uniform) scale factor and a
            offsety         translation of offsetx and offsety.
    
    """
    coord_trans = '%.2f 0 0 %.2f %.2f %.2f cm' % (scale, scale, offsetx,
                                                  offsety)
    commands = '\n'.join(('Q', 'q', coord_trans, content_string, 'Q'))

    try:
        orig_content = page['/Contents'].getObject()
    except KeyError:
        orig_content = ArrayObject([])
    stream = ContentStream(orig_content, page.pdf)
    stream.operations.insert(0, [[], 'q'])  # Existing content may not restore
    stream.operations.append([[], commands])  # graphics state at the end.
    page[NameObject('/Contents')] = stream

Пример #6

Показать файл

def extract_text(self):
    """ Patched extractText() from pyPdf to put spaces between different text snippets.
    """
    text = u""
    content = self["/Contents"].getObject()
    if not isinstance(content, ContentStream):
        content = ContentStream(content, self.pdf)
    # Note: we check all strings are TextStringObjects.  ByteStringObjects
    # are strings where the byte->string encoding was unknown, so adding
    # them to the text here would be gibberish.
    for operands, operator in content.operations:
        if operator == "Tj":
            _text = operands[0]
            if isinstance(_text, TextStringObject):
                text += _text
        elif operator == "T*":
            text += "\n"
        elif operator == "'":
            text += "\n"
            _text = operands[0]
            if isinstance(_text, TextStringObject):
                text += operands[0]
        elif operator == '"':
            _text = operands[2]
            if isinstance(_text, TextStringObject):
                text += "\n"
                text += _text
        elif operator == "TJ":
            for i in operands[0]:
                if isinstance(i, TextStringObject):
                    text += i

        if text and not text.endswith(" "):
            text += " "  # Don't let words concatenate

    return text

Пример #7

Показать файл

 def InsertXObject(self, name):
     " XObject can be an image or a 'form' (an arbitrary PDF sequence) "
     dlist = []
     xobject = self.page["/Resources"].getObject()['/XObject']
     stream = xobject[name]
     if stream.get('/Subtype') == '/Form':
         # insert contents into current page drawing
         if not name in self.formdrawings:       # extract if not already done
             pdf_fonts = self.FetchFonts(stream)
             bbox = stream.get('/BBox')
             matrix = stream.get('/Matrix')
             form_ops = ContentStream(stream, self.pdfdoc).operations
             oplist = [([], 'q'), (matrix, 'cm')]    # push state & apply matrix
             oplist.extend(form_ops)                 # add form contents
             oplist.append(([], 'Q'))                # restore original state
             self.formdrawings[name] = self.ProcessOperators(oplist, pdf_fonts)
         dlist.extend(self.formdrawings[name])
     elif stream.get('/Subtype') == '/Image':
         width = stream.get('/Width') 
         height = stream.get('/Height')
         depth = stream.get('/BitsPerComponent')
         filters = stream.get("/Filter", ())
         dlist.append(self.AddBitmap(stream._data, width, height, filters))
     return dlist