Exemplo n.º 1
0
 def __init__(
     self, userPassword, ownerPassword=None, canPrint=1, canModify=1, canCopy=1, canAnnotate=1, strength=40
 ):
     """
     This class defines the encryption properties to be used while creating a pdf document.
     Once initiated, a StandardEncryption object can be applied to a Canvas or a BaseDocTemplate.
     The userPassword parameter sets the user password on the encrypted pdf.
     The ownerPassword parameter sets the owner password on the encrypted pdf.
     The boolean flags canPrint, canModify, canCopy, canAnnotate determine wether a user can
     perform the corresponding actions on the pdf when only a user password has been supplied.
     If the user supplies the owner password while opening the pdf, all actions can be performed regardless
     of the flags.
     Note that the security provided by these encryption settings (and even more so for the flags) is very weak.
     """
     if isUnicodeType(ownerPassword):
         ownerPassword = ownerPassword.encode("utf-8")
     if isUnicodeType(userPassword):
         userPassword = userPassword.encode("utf-8")
     self.ownerPassword = ownerPassword
     self.userPassword = userPassword
     if strength == 40:
         self.revision = 2
     elif strength == 128:
         self.revision = 3
     self.canPrint = canPrint
     self.canModify = canModify
     self.canCopy = canCopy
     self.canAnnotate = canAnnotate
     self.O = self.U = self.P = self.key = None
Exemplo n.º 2
0
def encodePDF(key, objectNumber, generationNumber, string, revision=2):
    "Encodes a string or stream"
    newkey = key
    if isUnicodeType(newkey):
        newkey = newkey.encode('utf-8')
    n = objectNumber
    for i in range(3):
        c = chr(n & 0xff)
        if isUnicodeType(c):
            c = c.encode('utf-8')
        newkey += c
        n = n >> 8
    # extend 2 bytes of the generationNumber
    n = generationNumber
    for i in range(2):
        c = chr(n & 0xff)
        if isUnicodeType(c):
            c = c.encode('utf-8')
        newkey += c
        n = n >> 8
    md5output = hashlib.md5(newkey).digest()
    if revision == 2:
        key = md5output[:10]
    elif revision == 3:
        key = md5output  #all 16 bytes
    from reportlab.lib.arciv import ArcIV
    encrypted = ArcIV(key).encode(string)
    #print 'encrypted=', hexText(encrypted)
    if DEBUG:
        print('encodePDF(%s,%s,%s,%s,%s)==>%s' % tuple(
            map(lambda x: hexText(str(x)),
                (key, objectNumber, generationNumber, string, revision,
                 encrypted))))
    return encrypted
Exemplo n.º 3
0
def computeO(userPassword, ownerPassword, revision):
    from reportlab.lib.arciv import ArcIV

    # print 'digest of hello is %s' % hashlib.md5('hello').digest()
    assert revision in (2, 3), "Unknown algorithm revision %s" % revision
    if isUnicodeType(userPassword):
        userPassword = userPassword.encode("utf-8")
    if isUnicodeType(ownerPassword):
        ownerPassword = ownerPassword.encode("utf-8")
    if ownerPassword in (None, ""):
        ownerPassword = userPassword
    ownerPad = ownerPassword + PadString
    ownerPad = ownerPad[0:32]

    password = userPassword + PadString
    userPad = password[:32]

    digest = hashlib.md5(ownerPad).digest()
    if revision == 2:
        O = ArcIV(digest[:5]).encode(userPad)
    elif revision == 3:
        for i in range(50):
            digest = hashlib.md5(digest).digest()
        digest = digest[:16]
        O = userPad
        for i in range(20):
            thisKey = xorKey(i, digest)
            O = ArcIV(thisKey).encode(O)
    if DEBUG:
        print(
            "computeO(%s,%s,%s)==>%s"
            % tuple(map(lambda x: hexText(str(x)), (userPassword, ownerPassword, revision, O)))
        )
    return O
Exemplo n.º 4
0
 def __init__(self,
              userPassword,
              ownerPassword=None,
              canPrint=1,
              canModify=1,
              canCopy=1,
              canAnnotate=1,
              strength=40):
     '''
     This class defines the encryption properties to be used while creating a pdf document.
     Once initiated, a StandardEncryption object can be applied to a Canvas or a BaseDocTemplate.
     The userPassword parameter sets the user password on the encrypted pdf.
     The ownerPassword parameter sets the owner password on the encrypted pdf.
     The boolean flags canPrint, canModify, canCopy, canAnnotate determine wether a user can
     perform the corresponding actions on the pdf when only a user password has been supplied.
     If the user supplies the owner password while opening the pdf, all actions can be performed regardless
     of the flags.
     Note that the security provided by these encryption settings (and even more so for the flags) is very weak.
     '''
     if isUnicodeType(ownerPassword):
         ownerPassword = ownerPassword.encode('utf-8')
     if isUnicodeType(userPassword):
         userPassword = userPassword.encode('utf-8')
     self.ownerPassword = ownerPassword
     self.userPassword = userPassword
     if strength == 40:
         self.revision = 2
     elif strength == 128:
         self.revision = 3
     self.canPrint = canPrint
     self.canModify = canModify
     self.canCopy = canCopy
     self.canAnnotate = canAnnotate
     self.O = self.U = self.P = self.key = None
Exemplo n.º 5
0
 def handle_data(self, data):
     #the only data should be paragraph text, preformatted para
     #text, 'string text' for a fixed string on the page,
     #or table data
     if not type(data) is str:
         data = data.decode('utf-8')
     if self._curPara:
         self._curPara.rawtext = self._curPara.rawtext + data
     elif self._curPrefmt:
         self._curPrefmt.rawtext = self._curPrefmt.rawtext + data
     elif self._curPyCode:
         self._curPyCode.rawtext = self._curPyCode.rawtext + data
     elif self._curString:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curString.text = self._curString.text + data
     elif self._curTable:
         self._curTable.rawBlocks.append(data)
     elif self._curTitle != None:  # need to allow empty strings,
         # hence explicitly testing for None
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curTitle = self._curTitle + data
     elif self._curAuthor != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curAuthor = self._curAuthor + data
     elif self._curSubject != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curSubject = self._curSubject + data
Exemplo n.º 6
0
def computeO(userPassword, ownerPassword, revision):
    from reportlab.lib.arciv import ArcIV
    #print 'digest of hello is %s' % hashlib.md5('hello').digest()
    assert revision in (2, 3), 'Unknown algorithm revision %s' % revision
    if isUnicodeType(userPassword):
        userPassword = userPassword.encode('utf-8')
    if isUnicodeType(ownerPassword):
        ownerPassword = ownerPassword.encode('utf-8')
    if ownerPassword in (None, ''):
        ownerPassword = userPassword
    ownerPad = ownerPassword + PadString
    ownerPad = ownerPad[0:32]

    password = userPassword + PadString
    userPad = password[:32]

    digest = hashlib.md5(ownerPad).digest()
    if revision == 2:
        O = ArcIV(digest[:5]).encode(userPad)
    elif revision == 3:
        for i in range(50):
            digest = hashlib.md5(digest).digest()
        digest = digest[:16]
        O = userPad
        for i in range(20):
            thisKey = xorKey(i, digest)
            O = ArcIV(thisKey).encode(O)
    if DEBUG:
        print('computeO(%s,%s,%s)==>%s' % tuple(
            map(lambda x: hexText(str(x)),
                (userPassword, ownerPassword, revision, O))))
    return O
Exemplo n.º 7
0
 def handle_data(self, data):
     #the only data should be paragraph text, preformatted para
     #text, 'string text' for a fixed string on the page,
     #or table data
     if not type(data) is str:
         data = data.decode('utf-8')
     if self._curPara:
         self._curPara.rawtext = self._curPara.rawtext + data
     elif self._curPrefmt:
         self._curPrefmt.rawtext = self._curPrefmt.rawtext + data
     elif self._curPyCode:
         self._curPyCode.rawtext = self._curPyCode.rawtext + data
     elif  self._curString:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curString.text = self._curString.text + data
     elif self._curTable:
         self._curTable.rawBlocks.append(data)
     elif self._curTitle != None:  # need to allow empty strings,
         # hence explicitly testing for None
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curTitle = self._curTitle + data
     elif self._curAuthor != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curAuthor = self._curAuthor + data
     elif self._curSubject != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curSubject = self._curSubject + data
Exemplo n.º 8
0
def encodePDF(key, objectNumber, generationNumber, string, revision=2):
    "Encodes a string or stream"
    newkey = key
    if isUnicodeType(newkey):
        newkey = newkey.encode("utf-8")
    n = objectNumber
    for i in range(3):
        c = chr(n & 0xFF)
        if isUnicodeType(c):
            c = c.encode("utf-8")
        newkey += c
        n = n >> 8
    # extend 2 bytes of the generationNumber
    n = generationNumber
    for i in range(2):
        c = chr(n & 0xFF)
        if isUnicodeType(c):
            c = c.encode("utf-8")
        newkey += c
        n = n >> 8
    md5output = hashlib.md5(newkey).digest()
    if revision == 2:
        key = md5output[:10]
    elif revision == 3:
        key = md5output  # all 16 bytes
    from reportlab.lib.arciv import ArcIV

    encrypted = ArcIV(key).encode(string)
    # print 'encrypted=', hexText(encrypted)
    if DEBUG:
        print(
            "encodePDF(%s,%s,%s,%s,%s)==>%s"
            % tuple(map(lambda x: hexText(str(x)), (key, objectNumber, generationNumber, string, revision, encrypted)))
        )
    return encrypted
Exemplo n.º 9
0
def encryptionkey(password, OwnerKey, Permissions, FileId1, revision=2):
    # FileId1 is first string of the fileid array
    # add padding string
    # AR force same as iText example
    # Permissions =  -1836   #int(Permissions - 2**31)
    if isUnicodeType(password):
        password = password.encode("utf-8")
    password = password + PadString
    # truncate to 32 bytes
    password = password[:32]
    # translate permissions to string, low order byte first
    p = Permissions
    permissionsString = []
    for i in range(4):
        byte = p & 0xFF  # seems to match what iText does
        p = p >> 8
        permissionsString.append(byte % 256)
    if sys.version_info[0] == 3:
        permissionsString = bytes(permissionsString)
    else:
        permissionsString = b"".join([chr(i) for i in permissionsString])

    if isUnicodeType(OwnerKey):
        OwnerKey = OwnerKey.encode("utf-8")
    if isUnicodeType(permissionsString):
        permissionsString = permissionsString.encode("utf-8")
    if isUnicodeType(FileId1):
        FileId1 = FileId1.encode("utf-8")

    hash = hashlib.md5(password)
    hash.update(OwnerKey)
    hash.update(permissionsString)
    hash.update(FileId1)

    md5output = hash.digest()

    if revision == 2:
        key = md5output[:5]
    elif revision == 3:  # revision 3 algorithm - loop 50 times
        for x in range(50):
            md5output = hashlib.md5(md5output).digest()
        key = md5output[:16]

    if DEBUG:
        print(
            "encryptionkey(%s,%s,%s,%s,%s)==>%s"
            % tuple(map(lambda x: hexText(str(x)), (password, OwnerKey, Permissions, FileId1, revision, key)))
        )
    return key
Exemplo n.º 10
0
def encryptionkey(password, OwnerKey, Permissions, FileId1, revision=2):
    # FileId1 is first string of the fileid array
    # add padding string
    #AR force same as iText example
    #Permissions =  -1836   #int(Permissions - 2**31)
    if isUnicodeType(password):
        password = password.encode('utf-8')
    password = password + PadString
    # truncate to 32 bytes
    password = password[:32]
    # translate permissions to string, low order byte first
    p = Permissions
    permissionsString = []
    for i in range(4):
        byte = (p & 0xff)  # seems to match what iText does
        p = p >> 8
        permissionsString.append(byte % 256)
    if sys.version_info[0] == 3:
        permissionsString = bytes(permissionsString)
    else:
        permissionsString = b''.join([chr(i) for i in permissionsString])

    if isUnicodeType(OwnerKey):
        OwnerKey = OwnerKey.encode('utf-8')
    if isUnicodeType(permissionsString):
        permissionsString = permissionsString.encode('utf-8')
    if isUnicodeType(FileId1):
        FileId1 = FileId1.encode('utf-8')

    hash = hashlib.md5(password)
    hash.update(OwnerKey)
    hash.update(permissionsString)
    hash.update(FileId1)

    md5output = hash.digest()

    if revision == 2:
        key = md5output[:5]
    elif revision == 3:  #revision 3 algorithm - loop 50 times
        for x in range(50):
            md5output = hashlib.md5(md5output).digest()
        key = md5output[:16]

    if DEBUG:
        print('encryptionkey(%s,%s,%s,%s,%s)==>%s' % tuple(
            map(lambda x: hexText(str(x)),
                (password, OwnerKey, Permissions, FileId1, revision, key))))
    return key
Exemplo n.º 11
0
def computeU(encryptionkey, encodestring=PadString, revision=2, documentId=None):
    from reportlab.lib.arciv import ArcIV

    if revision == 2:
        result = ArcIV(encryptionkey).encode(encodestring)
    elif revision == 3:
        assert documentId is not None, "Revision 3 algorithm needs the document ID!"
        h = hashlib.md5(PadString)
        if isUnicodeType(documentId):
            h.update(documentId.encode("utf-8"))
        else:
            h.update(documentId)
        tmp = h.digest()
        tmp = ArcIV(encryptionkey).encode(tmp)
        for n in range(1, 20):
            thisKey = xorKey(n, encryptionkey)
            tmp = ArcIV(thisKey).encode(tmp)
        while len(tmp) < 32:
            tmp = tmp + b"\000"
        result = tmp
    if DEBUG:
        print(
            "computeU(%s,%s,%s,%s)==>%s"
            % tuple(map(lambda x: hexText(str(x)), (encryptionkey, encodestring, revision, documentId, result)))
        )
    return result
Exemplo n.º 12
0
 def _py_stringWidth(self, text, size, encoding='utf-8'):
     "Calculate text width"
     if not isUnicodeType(text):
         text = text.decode(encoding or 'utf-8')   # encoding defaults to utf-8
     g = self.face.charWidths.get
     dw = self.face.defaultWidth
     return 0.001*size*sum([g(ord(u),dw) for u in text])
Exemplo n.º 13
0
def _py_unicode2T1(utext,fonts):
    '''return a list of (font,string) pairs representing the unicode text'''
    R = []
    font, fonts = fonts[0], fonts[1:]
    enc = font.encName
    if 'UCS-2' in enc:
        enc = 'UTF16'
    while utext:
        try:
            if isUnicodeType(utext):
                s = utext.encode(enc)
            else:
                s = utext
            R.append((font,s))
            break
        except UnicodeEncodeError as e:
            i0, il = e.args[2:4]
            if i0:
                R.append((font,utext[:i0].encode(enc)))
            if fonts:
                R.extend(_py_unicode2T1(utext[i0:il],fonts))
            else:
                R.append((_notdefFont,_notdefChar*(il-i0)))
            utext = utext[il:]
    return R
Exemplo n.º 14
0
    def stringWidth(self, text, size, encoding=None):
        "Just ensure we do width test on characters, not bytes..."
        if not isUnicodeType(text):
            text = text.decode('utf8')

        widths = self.unicodeWidths
        return size * 0.001 * sum([widths.get(uch, 1000) for uch in text])
Exemplo n.º 15
0
    def stringWidth(self, text, size, encoding=None):
        "Just ensure we do width test on characters, not bytes..."
        if not isUnicodeType(text):
            text = text.decode('utf8')

        widths = self.unicodeWidths
        return size * 0.001 * sum([widths.get(uch, 1000) for uch in text])
Exemplo n.º 16
0
def _py_unicode2T1(utext, fonts):
    '''return a list of (font,string) pairs representing the unicode text'''
    R = []
    font, fonts = fonts[0], fonts[1:]
    enc = font.encName
    if 'UCS-2' in enc:
        enc = 'UTF16'
    while utext:
        try:
            if isUnicodeType(utext):
                s = utext.encode(enc)
            else:
                s = utext
            R.append((font, s))
            break
        except UnicodeEncodeError as e:
            i0, il = e.args[2:4]
            if i0:
                R.append((font, utext[:i0].encode(enc)))
            if fonts:
                R.extend(_py_unicode2T1(utext[i0:il], fonts))
            else:
                R.append((_notdefFont, _notdefChar * (il - i0)))
            utext = utext[il:]
    return R
Exemplo n.º 17
0
def computeU(encryptionkey,
             encodestring=PadString,
             revision=2,
             documentId=None):
    from reportlab.lib.arciv import ArcIV
    if revision == 2:
        result = ArcIV(encryptionkey).encode(encodestring)
    elif revision == 3:
        assert documentId is not None, "Revision 3 algorithm needs the document ID!"
        h = hashlib.md5(PadString)
        if isUnicodeType(documentId):
            h.update(documentId.encode('utf-8'))
        else:
            h.update(documentId)
        tmp = h.digest()
        tmp = ArcIV(encryptionkey).encode(tmp)
        for n in range(1, 20):
            thisKey = xorKey(n, encryptionkey)
            tmp = ArcIV(thisKey).encode(tmp)
        while len(tmp) < 32:
            tmp = tmp + b'\000'
        result = tmp
    if DEBUG:
        print('computeU(%s,%s,%s,%s)==>%s' % tuple(
            map(lambda x: hexText(str(x)),
                (encryptionkey, encodestring, revision, documentId, result))))
    return result
Exemplo n.º 18
0
 def _py_stringWidth(self, text, size, encoding='utf-8'):
     "Calculate text width"
     if not isUnicodeType(text):
         text = text.decode(encoding
                            or 'utf-8')  # encoding defaults to utf-8
     g = self.face.charWidths.get
     dw = self.face.defaultWidth
     return 0.001 * size * sum([g(ord(u), dw) for u in text])
Exemplo n.º 19
0
 def formatForPdf(self, text):
     #these ones should be encoded asUTF16 minus the BOM
     from codecs import utf_16_be_encode
     #print 'formatting %s: %s' % (type(text), repr(text))
     if not isUnicodeType(text):
         text = text.decode('utf8')
     utfText = utf_16_be_encode(text)[0]
     encoded = _escape(utfText)
     #print '  encoded:',encoded
     return encoded
Exemplo n.º 20
0
 def formatForPdf(self, text):
     #these ones should be encoded asUTF16 minus the BOM
     from codecs import utf_16_be_encode
     #print 'formatting %s: %s' % (type(text), repr(text))
     if not isUnicodeType(text):
         text = text.decode('utf8')
     utfText = utf_16_be_encode(text)[0]
     encoded = _escape(utfText)
     #print '  encoded:',encoded
     return encoded
Exemplo n.º 21
0
def _AsciiHexEncode(input):
    """Encodes input using ASCII-Hex coding.

    This is a verbose encoding used for binary data within
    a PDF file.  One byte binary becomes two bytes of ASCII.
    Helper function used by images."""
    if isUnicodeType(input):
        input = input.encode('utf-8')
    output = getBytesIO()
    output.write(binascii.b2a_hex(input))
    output.write(b'>')
    return output.getvalue()
Exemplo n.º 22
0
def _AsciiHexEncode(input):
    """Encodes input using ASCII-Hex coding.

    This is a verbose encoding used for binary data within
    a PDF file.  One byte binary becomes two bytes of ASCII.
    Helper function used by images."""
    if isUnicodeType(input):
        input = input.encode('utf-8')
    output = getBytesIO()
    output.write(binascii.b2a_hex(input))
    output.write(b'>')
    return output.getvalue()
Exemplo n.º 23
0
 def handle_cdata(self, data):
     #just append to current paragraph text, so we can quote XML
     if self._curPara:
         self._curPara.rawtext = self._curPara.rawtext + data
     elif self._curPrefmt:
         self._curPrefmt.rawtext = self._curPrefmt.rawtext + data
     elif self._curPyCode:
         self._curPyCode.rawtext = self._curPyCode.rawtext + data
     elif self._curString:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curString.text = self._curString.text + data
     elif self._curTable:
         self._curTable.rawBlocks.append(data)
     elif self._curAuthor != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curAuthor = self._curAuthor + data
     elif self._curSubject != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curSubject = self._curSubject + data
Exemplo n.º 24
0
 def _py_stringWidth(self, text, size, encoding='utf8'):
     """This is the "purist" approach to width.  The practical approach
     is to use the stringWidth function, which may be swapped in for one
     written in C."""
     if not isUnicodeType(text):
         text = text.decode(encoding)
     w = 0
     for f, t in unicode2T1(text,[self]+self.substitutionFonts):
         if sys.version_info[0] == 3:
             w += sum([f.widths[c] for c in t])
         else:
             w += sum([f.widths[ord(c)] for c in t])
     return w*0.001*size
Exemplo n.º 25
0
 def _py_stringWidth(self, text, size, encoding='utf8'):
     """This is the "purist" approach to width.  The practical approach
     is to use the stringWidth function, which may be swapped in for one
     written in C."""
     if not isUnicodeType(text):
         text = text.decode(encoding)
     w = 0
     for f, t in unicode2T1(text, [self] + self.substitutionFonts):
         if sys.version_info[0] == 3:
             w += sum([f.widths[c] for c in t])
         else:
             w += sum([f.widths[ord(c)] for c in t])
     return w * 0.001 * size
Exemplo n.º 26
0
 def handle_cdata(self, data):
     #just append to current paragraph text, so we can quote XML
     if self._curPara:
         self._curPara.rawtext = self._curPara.rawtext + data
     elif self._curPrefmt:
         self._curPrefmt.rawtext = self._curPrefmt.rawtext + data
     elif self._curPyCode:
         self._curPyCode.rawtext = self._curPyCode.rawtext + data
     elif  self._curString:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curString.text = self._curString.text + data
     elif self._curTable:
         self._curTable.rawBlocks.append(data)
     elif self._curAuthor != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curAuthor = self._curAuthor + data
     elif self._curSubject != None:
         if sys.version_info[0] == 3 and not isUnicodeType(data):
             data = data.decode('utf-8')
         self._curSubject = self._curSubject + data
Exemplo n.º 27
0
 def splitString(self, text, doc, encoding='utf-8'):
     """Splits text into a number of chunks, each of which belongs to a
     single subset.  Returns a list of tuples (subset, string).  Use subset
     numbers with getSubsetInternalName.  Doc is needed for distinguishing
     subsets when building different documents at the same time."""
     asciiReadable = self._asciiReadable
     try:
         state = self.state[doc]
     except KeyError:
         state = self.state[doc] = TTFont.State(asciiReadable)
     curSet = -1
     cur = []
     results = []
     if not isUnicodeType(text):
         text = text.decode(encoding
                            or 'utf-8')  # encoding defaults to utf-8
     assignments = state.assignments
     subsets = state.subsets
     for code in map(ord, text):
         if code in assignments:
             n = assignments[code]
         else:
             if state.frozen:
                 raise pdfdoc.PDFError(
                     "Font %s is already frozen, cannot add new character U+%04X"
                     % (self.fontName, code))
             n = state.nextCode
             if n & 0xFF == 32:
                 # make code 32 always be a space character
                 if n != 32: subsets[n >> 8].append(32)
                 state.nextCode += 1
                 n = state.nextCode
             state.nextCode += 1
             assignments[code] = n
             if n > 32:
                 if not (n & 0xFF): subsets.append([])
                 subsets[n >> 8].append(code)
             else:
                 subsets[0][n] = code
         if (n >> 8) != curSet:
             if cur:
                 results.append((curSet, ''.join(map(chr, cur))))
             curSet = (n >> 8)
             cur = []
         cur.append(n & 0xFF)
     if cur:
         results.append((curSet, ''.join(map(chr, cur))))
     return results
Exemplo n.º 28
0
    def makeStream(self):
        "Finishes the generation and returns the TTF file as a string"
        stm = getBytesIO()
        write = stm.write

        numTables = len(self.tables)
        searchRange = 1
        entrySelector = 0
        while searchRange * 2 <= numTables:
            searchRange = searchRange * 2
            entrySelector = entrySelector + 1
        searchRange = searchRange * 16
        rangeShift = numTables * 16 - searchRange

        # Header
        write(
            pack(">lHHHH", 0x00010000, numTables, searchRange, entrySelector,
                 rangeShift))

        # Table directory
        tables = list(self.tables.items())
        tables.sort()  # XXX is this the correct order?
        offset = 12 + numTables * 16
        for tag, data in tables:
            if tag == 'head':
                head_start = offset
            checksum = calcChecksum(data)
            if isUnicodeType(tag):
                tag = tag.encode('utf-8')
            write(tag)
            write(pack(">LLL", checksum, offset, len(data)))
            paddedLength = (len(data) + 3) & ~3
            offset = offset + paddedLength

        # Table data
        for tag, data in tables:
            data += b"\0\0\0"
            write(data[:len(data) & ~3])

        checksum = calcChecksum(stm.getvalue())
        checksum = add32(0xB1B0AFBA, -checksum)
        stm.seek(head_start + 8)
        write(pack('>L', checksum))

        return stm.getvalue()
Exemplo n.º 29
0
    def makeStream(self):
        "Finishes the generation and returns the TTF file as a string"
        stm = getBytesIO()
        write = stm.write

        numTables = len(self.tables)
        searchRange = 1
        entrySelector = 0
        while searchRange * 2 <= numTables:
            searchRange = searchRange * 2
            entrySelector = entrySelector + 1
        searchRange = searchRange * 16
        rangeShift = numTables * 16 - searchRange

        # Header
        write(pack(">lHHHH", 0x00010000, numTables, searchRange,
                                 entrySelector, rangeShift))

        # Table directory
        tables = list(self.tables.items())
        tables.sort()     # XXX is this the correct order?
        offset = 12 + numTables * 16
        for tag, data in tables:
            if tag == 'head':
                head_start = offset
            checksum = calcChecksum(data)
            if isUnicodeType(tag):
                tag = tag.encode('utf-8')
            write(tag)
            write(pack(">LLL", checksum, offset, len(data)))
            paddedLength = (len(data)+3)&~3
            offset = offset + paddedLength

        # Table data
        for tag, data in tables:
            data += b"\0\0\0"
            write(data[:len(data)&~3])

        checksum = calcChecksum(stm.getvalue())
        checksum = add32(0xB1B0AFBA, -checksum)
        stm.seek(head_start + 8)
        write(pack('>L', checksum))

        return stm.getvalue()
Exemplo n.º 30
0
 def splitString(self, text, doc, encoding='utf-8'):
     """Splits text into a number of chunks, each of which belongs to a
     single subset.  Returns a list of tuples (subset, string).  Use subset
     numbers with getSubsetInternalName.  Doc is needed for distinguishing
     subsets when building different documents at the same time."""
     asciiReadable = self._asciiReadable
     try: state = self.state[doc]
     except KeyError: state = self.state[doc] = TTFont.State(asciiReadable)
     curSet = -1
     cur = []
     results = []
     if not isUnicodeType(text):
         text = text.decode(encoding or 'utf-8')   # encoding defaults to utf-8
     assignments = state.assignments
     subsets = state.subsets
     for code in map(ord,text):
         if code in assignments:
             n = assignments[code]
         else:
             if state.frozen:
                 raise pdfdoc.PDFError("Font %s is already frozen, cannot add new character U+%04X" % (self.fontName, code))
             n = state.nextCode
             if n&0xFF==32:
                 # make code 32 always be a space character
                 if n!=32: subsets[n >> 8].append(32)
                 state.nextCode += 1
                 n = state.nextCode
             state.nextCode += 1
             assignments[code] = n
             if n>32:
                 if not(n&0xFF): subsets.append([])
                 subsets[n >> 8].append(code)
             else:
                 subsets[0][n] = code
         if (n >> 8) != curSet:
             if cur:
                 results.append((curSet, ''.join(map(chr,cur))))
             curSet = (n >> 8)
             cur = []
         cur.append(n & 0xFF)
     if cur:
         results.append((curSet,''.join(map(chr,cur))))
     return results
Exemplo n.º 31
0
def dumbSplit(word, widths, maxWidths):
    """This function attempts to fit as many characters as possible into the available
    space, cutting "like a knife" between characters.  This would do for Chinese.
    It returns a list of (text, extraSpace) items where text is a Unicode string,
    and extraSpace is the points of unused space available on the line.  This is a
    structure which is fairly easy to display, and supports 'backtracking' approaches
    after the fact.

    Test cases assume each character is ten points wide...

    >>> dumbSplit(u'Hello', [10]*5, 60)
    [[10, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 50)
    [[0, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 40)
    [[0, u'Hell'], [30, u'o']]
    """
    _more = """
    #>>> dumbSplit(u'Hello', [10]*5, 4)   # less than one character
    #(u'', u'Hello')
    # this says 'Nihongo wa muzukashii desu ne!' (Japanese is difficult isn't it?) in 12 characters
    >>> jtext = u'\u65e5\u672c\u8a9e\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01'
    >>> dumbSplit(jtext, [10]*11, 30)   #
    (u'\u65e5\u672c\u8a9e', u'\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01')
    """
    if not isinstance(maxWidths, (list, tuple)): maxWidths = [maxWidths]
    assert isUnicodeType(word)
    lines = []
    i = widthUsed = lineStartPos = 0
    maxWidth = maxWidths[0]
    nW = len(word)
    while i < nW:
        w = widths[i]
        c = word[i]
        widthUsed += w
        i += 1
        if widthUsed > maxWidth + _FUZZ and widthUsed > 0:
            extraSpace = maxWidth - widthUsed
            if ord(c) < 0x3000:
                # we appear to be inside a non-Asian script section.
                # (this is a very crude test but quick to compute).
                # This is likely to be quite rare so the speed of the
                # code below is hopefully not a big issue.  The main
                # situation requiring this is that a document title
                # with an english product name in it got cut.

                # we count back and look for
                #  - a space-like character
                #  - reversion to Kanji (which would be a good split point)
                #  - in the worst case, roughly half way back along the line
                limitCheck = (lineStartPos + i) >> 1  #(arbitrary taste issue)
                for j in range(i - 1, limitCheck, -1):
                    cj = word[j]
                    if category(cj) == 'Zs' or ord(cj) >= 0x3000:
                        k = j + 1
                        if k < i:
                            j = k + 1
                            extraSpace += sum(widths[j:i])
                            w = widths[k]
                            c = word[k]
                            i = j
                            break

                #end of English-within-Asian special case

            #we are pushing this character back, but
            #the most important of the Japanese typography rules
            #if this character cannot start a line, wrap it up to this line so it hangs
            #in the right margin. We won't do two or more though - that's unlikely and
            #would result in growing ugliness.
            #and increase the extra space
            #bug fix contributed by Alexander Vasilenko <*****@*****.**>
            if c not in ALL_CANNOT_START and i > lineStartPos + 1:
                #otherwise we need to push the character back
                #the i>lineStart+1 condition ensures progress
                i -= 1
                extraSpace += w

            #lines.append([maxWidth-sum(widths[lineStartPos:i]), word[lineStartPos:i].strip()])
            lines.append([extraSpace, word[lineStartPos:i].strip()])
            try:
                maxWidth = maxWidths[len(lines)]
            except IndexError:
                maxWidth = maxWidths[-1]  # use the last one
            lineStartPos = i
            widthUsed = 0

    #any characters left?
    if widthUsed > 0:
        lines.append([maxWidth - widthUsed, word[lineStartPos:]])

    return lines
Exemplo n.º 32
0
def dumbSplit(word, widths, maxWidths):
    """This function attempts to fit as many characters as possible into the available
    space, cutting "like a knife" between characters.  This would do for Chinese.
    It returns a list of (text, extraSpace) items where text is a Unicode string,
    and extraSpace is the points of unused space available on the line.  This is a
    structure which is fairly easy to display, and supports 'backtracking' approaches
    after the fact.

    Test cases assume each character is ten points wide...

    >>> dumbSplit(u'Hello', [10]*5, 60)
    [[10, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 50)
    [[0, u'Hello']]
    >>> dumbSplit(u'Hello', [10]*5, 40)
    [[0, u'Hell'], [30, u'o']]
    """
    _more = """
    #>>> dumbSplit(u'Hello', [10]*5, 4)   # less than one character
    #(u'', u'Hello')
    # this says 'Nihongo wa muzukashii desu ne!' (Japanese is difficult isn't it?) in 12 characters
    >>> jtext = u'\u65e5\u672c\u8a9e\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01'
    >>> dumbSplit(jtext, [10]*11, 30)   #
    (u'\u65e5\u672c\u8a9e', u'\u306f\u96e3\u3057\u3044\u3067\u3059\u306d\uff01')
    """
    if not isinstance(maxWidths,(list,tuple)): maxWidths = [maxWidths]
    assert isUnicodeType(word)
    lines = []
    i = widthUsed = lineStartPos = 0
    maxWidth = maxWidths[0]
    nW = len(word)
    while i<nW:
        w = widths[i]
        c = word[i]
        widthUsed += w
        i += 1
        if widthUsed > maxWidth + _FUZZ and widthUsed>0:
            extraSpace = maxWidth - widthUsed
            if ord(c)<0x3000:
                # we appear to be inside a non-Asian script section.
                # (this is a very crude test but quick to compute).
                # This is likely to be quite rare so the speed of the
                # code below is hopefully not a big issue.  The main
                # situation requiring this is that a document title
                # with an english product name in it got cut.
                
                
                # we count back and look for 
                #  - a space-like character
                #  - reversion to Kanji (which would be a good split point)
                #  - in the worst case, roughly half way back along the line
                limitCheck = (lineStartPos+i)>>1        #(arbitrary taste issue)
                for j in range(i-1,limitCheck,-1):
                    cj = word[j]
                    if category(cj)=='Zs' or ord(cj)>=0x3000:
                        k = j+1
                        if k<i:
                            j = k+1
                            extraSpace += sum(widths[j:i])
                            w = widths[k]
                            c = word[k]
                            i = j
                            break

                #end of English-within-Asian special case

            #we are pushing this character back, but
            #the most important of the Japanese typography rules
            #if this character cannot start a line, wrap it up to this line so it hangs
            #in the right margin. We won't do two or more though - that's unlikely and
            #would result in growing ugliness.
            #and increase the extra space
            #bug fix contributed by Alexander Vasilenko <*****@*****.**>
            if c not in ALL_CANNOT_START and i>lineStartPos+1:
                #otherwise we need to push the character back
                #the i>lineStart+1 condition ensures progress
                i -= 1
                extraSpace += w

            #lines.append([maxWidth-sum(widths[lineStartPos:i]), word[lineStartPos:i].strip()])
            lines.append([extraSpace, word[lineStartPos:i].strip()])
            try:
                maxWidth = maxWidths[len(lines)]
            except IndexError:
                maxWidth = maxWidths[-1]  # use the last one
            lineStartPos = i
            widthUsed = 0

    #any characters left?
    if widthUsed > 0:
        lines.append([maxWidth - widthUsed, word[lineStartPos:]])

    return lines