def _fixcp1252(envkey=None): """ Fixup cp1252 codec in order to use it as a real superset of latin-1 :Parameters: - `envkey`: The environment key to lookup. If this key is set and ``1`` the charset definition won't be fixed and this function is a no-op. If unset or ``None``, no lookup is made. :Types: - `envkey`: ``str`` """ import os if envkey is not None and os.environ.get(envkey) == '1': return import codecs from encodings import cp1252 try: dmap = cp1252.decoding_map # pylint: disable = E1101 except AttributeError: dtable = list(cp1252.decoding_table) codepoint = 0 try: while True: codepoint = dtable.index(u'\ufffe', codepoint) dtable[codepoint] = unichr(codepoint) except ValueError: # no more undefined points there pass dtable = u''.join(dtable) cp1252.decoding_table = dtable cp1252.encoding_table = codecs.charmap_build(dtable) else: # Python 2.4 for key, value in dmap.iteritems(): if value is None: dmap[key] = key cp1252.encoding_map = codecs.make_encoding_map(dmap)
def __init__(self, locking_shift_decode_map=None, single_shift_decode_map=None): if locking_shift_decode_map is None: locking_shift_decode_map = BASIC_CHARACTER_SET if single_shift_decode_map is None: single_shift_decode_map = BASIC_CHARACTER_SET_EXTENSION self._decode_map = locking_shift_decode_map self._decode_map.update( dict(((self._ESCAPE << 8 | key), value) for (key, value) in single_shift_decode_map.items())) self._encoding_map = codecs.make_encoding_map(self._decode_map)
def find_pdfdocencoding(encoding): """ This function conforms to the codec module registration protocol. It defers calculating data structures until a pdfdocencoding encode or decode is required. PDFDocEncoding is described in the PDF 1.7 reference manual. """ if encoding != 'pdfdocencoding': return # Create the decoding map based on the table in section D.2 of the # PDF 1.7 manual # Start off with the characters with 1:1 correspondence decoding_map = set(range(0x20, 0x7F)) | set(range(0xA1, 0x100)) decoding_map.update((0x09, 0x0A, 0x0D)) decoding_map.remove(0xAD) decoding_map = dict((x, x) for x in decoding_map) # Add in the special Unicode characters decoding_map.update(zip(range(0x18, 0x20), ( 0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC))) decoding_map.update(zip(range(0x80, 0x9F), ( 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203A, 0x2212, 0x2030, 0x201E, 0x201C, 0x201D, 0x2018, 0x2019, 0x201A, 0x2122, 0xFB01, 0xFB02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E))) decoding_map[0xA0] = 0x20AC # Make the encoding map from the decoding map encoding_map = codecs.make_encoding_map(decoding_map) # Not every PDF producer follows the spec, so conform to Postel's law # and interpret encoded strings if at all possible. In particular, they # might have nulls and form-feeds, judging by random code snippets # floating around the internet. decoding_map.update(((x, x) for x in range(0x18))) def encode(input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) return codecs.CodecInfo(encode, decode, name='pdfdocencoding')
""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0004: 0x009c, # SELECT 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # REQUIRED NEW LINE 0x0007: 0x007f, # DELETE 0x0008: 0x0097, # GRAPHIC ESCAPE 0x0009: 0x008d, # SUPERSCRIPT
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET 0x0082: 0x05d2, # HEBREW LETTER GIMEL 0x0083: 0x05d3, # HEBREW LETTER DALET 0x0084: 0x05d4, # HEBREW LETTER HE 0x0085: 0x05d5, # HEBREW LETTER VAV
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: None, 0x00a2: None,
""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0025: 0x066a, # ARABIC PERCENT SIGN 0x0080: 0x00b0, # DEGREE SIGN 0x0081: 0x00b7, # MIDDLE DOT 0x0082: 0x2219, # BULLET OPERATOR 0x0083: 0x221a, # SQUARE ROOT 0x0084: 0x2592, # MEDIUM SHADE
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: None, 0x00a2: None, 0x00a3: None, 0x00a5: None, 0x00a6: None, 0x00a7: None,
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET
""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a4: 0x20ac, # EURO SIGN 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON 0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON 0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL 0x0007: 0x007f, # DELETE 0x0008: 0x0097, # CONTROL 0x0009: 0x008d, # CONTROL
""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: None, # UNDEFINED 0x0081: None, # UNDEFINED 0x0082: None, # UNDEFINED 0x0083: None, # UNDEFINED 0x0084: None, # UNDEFINED 0x0085: None, # UNDEFINED
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: 0x067e, # ARABIC LETTER PEH 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED
""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00b9, # SUPERSCRIPT ONE 0x0082: 0x00b2, # SUPERSCRIPT TWO 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00b3, # SUPERSCRIPT THREE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA 0x00e8: 0x03b7, # GREEK SMALL LETTER ETA 0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA 0x00ea: 0x03be, # GREEK SMALL LETTER XI 0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA 0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA 0x00ed: 0x03bc, # GREEK SMALL LETTER MU 0x00ee: 0x03bd, # GREEK SMALL LETTER NU 0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON 0x00f0: 0x03c0, # GREEK SMALL LETTER PI 0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00f2: 0x03c1, # GREEK SMALL LETTER RHO 0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA 0x00f4: 0x03c4, # GREEK SMALL LETTER TAU 0x00f5: 0x03b8, # GREEK SMALL LETTER THETA 0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA 0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA 0x00f8: 0x03c7, # GREEK SMALL LETTER CHI 0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON 0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA 0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA 0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 0x00ff: None, # UNDEFINED }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map)
""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: 0x067e, # ARABIC LETTER PEH
#!/usr/local/bin/python2.1 """ Python Character Mapping Codec for ROT13. See http://ucsub.colorado.edu/~kominek/rot13/ for details. Written by Marc-Andre Lemburg ([email protected]). """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0041: 0x004e, 0x0042: 0x004f,
""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE 0x00a2: 0x02d8, # BREVE
0x007f: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x1b0a: 0x000c, # FORM FEED 0x1b14: 0x005e, # CIRCUMFLEX ACCENT 0x1b28: 0x007b, # LEFT CURLY BRACKET 0x1b29: 0x007d, # RIGHT CURLY BRACKET 0x1b2f: 0x005c, # REVERSE SOLIDUS 0x1b3c: 0x005b, # LEFT SQUARE BRACKET 0x1b3d: 0x007e, # TILDE 0x1b3e: 0x005d, # RIGHT SQUARE BRACKET 0x1b40: 0x007c, # VERTICAL LINE 0x1b65: 0x20ac, # EURO SIGN } ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) extra_encoding_map = codecs.make_encoding_map(extra_decoding_map) if __name__=='__main__': import string c = Codec() def test(s): r = c.decode(c.encode(s))[0] if r != s: print 'in %r != out %r'%(s, r) test(unicode(string.letters)) test(u'\u20ac') test(u'\xa0') try: test(u'av\u20ad') except Exception, e: print `u'av\u20ad'`, 'raised', e
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
# GNU Lesser General Public License for more details. ##################################################################### """JIS X 0201 Codec required for JIS8 encoding of SecsVarJIS8""" import codecs jis8_decoding_map = codecs.make_identity_dict(range(256)) jis8_decoding_map.update({ 0x005C: 0x00A5, # Yen Sign 0x007E: 0x203E, # Overline }) for i in range(0x00A1, 0x00E0): jis8_decoding_map[i] = i + 0xFEC0 jis8_encoding_map = codecs.make_encoding_map(jis8_decoding_map) def jis_x_0201_encode(data, errors='strict'): return codecs.charmap_encode(data,errors,jis8_encoding_map) def jis_x_0201_decode(data, errors='strict'): return codecs.charmap_decode(data,errors,jis8_decoding_map) def jis_x_0201_search(name): if name == "jis-8": return codecs.CodecInfo(encode=jis_x_0201_encode, decode=jis_x_0201_decode, name="jis-8") return None # register the codec codecs.register(jis_x_0201_search)
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO 0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE 0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR 0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0083: None, # UNDEFINED 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
def codegen(name, map, encodingname, comments=1): """ Returns Python source for the given map. Comments are included in the source, if comments is true (default). """ # Generate code decoding_map_code = python_mapdef_code( 'decoding_map', map, comments=comments) decoding_table_code = python_tabledef_code( 'decoding_table', map, comments=comments) encoding_map_code = python_mapdef_code( 'encoding_map', codecs.make_encoding_map(map), comments=comments, precisions=(4, 2)) if decoding_table_code: suffix = 'table' else: suffix = 'map' l = [ '''\ """ Python Character Mapping Codec %s generated from '%s' with gencodec.py. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_%s) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_%s) ''' % (encodingname, name, suffix, suffix)] l.append('''\ class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input, self.errors, encoding_%s)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input, self.errors, decoding_%s)[0]''' % (suffix, suffix)) l.append(''' class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name=%r, encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ''' % encodingname.replace('_', '-')) # Add decoding table or map (with preference to the table) if not decoding_table_code: l.append(''' ### Decoding Map ''') l.extend(decoding_map_code) else: l.append(''' ### Decoding Table ''') l.extend(decoding_table_code) # Add encoding map if decoding_table_code: l.append(''' ### Encoding table encoding_table = codecs.charmap_build(decoding_table) ''') else: l.append(''' ### Encoding Map ''') l.extend(encoding_map_code) # Final new-line l.append('') return '\n'.join(l).expandtabs()
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT 0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT 0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL 0x0007: 0x007f, # DELETE 0x0008: 0x0097, # CONTROL 0x0009: 0x008d, # CONTROL
""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON 0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE 0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
def codegen(name, map, encodingname, comments=1): """ Returns Python source for the given map. Comments are included in the source, if comments is true (default). """ # Generate code decoding_map_code = python_mapdef_code( 'decoding_map', map, comments=comments) decoding_table_code = python_tabledef_code( 'decoding_table', map, comments=comments) encoding_map_code = python_mapdef_code( 'encoding_map', codecs.make_encoding_map(map), comments=comments, precisions=(4, 2)) if decoding_table_code: suffix = 'table' else: suffix = 'map' l = [ '''\ """ Python Character Mapping Codec %s generated from '%s' with gencodec.py. """#" # Ensure the generated codec works with Python 2.6+. from __future__ import unicode_literals import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_%s) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_%s) ''' % (encodingname, name, suffix, suffix)] l.append('''\ class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input,self.errors,encoding_%s)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' % (suffix, suffix)) l.append(''' class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return codecs.CodecInfo( name=%r, encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, ) ''' % encodingname.replace('_', '-')) # Add decoding table or map (with preference to the table) if not decoding_table_code: l.append(''' ### Decoding Map ''') l.extend(decoding_map_code) else: l.append(''' ### Decoding Table ''') l.extend(decoding_table_code) # Add encoding map if decoding_table_code: l.append(''' ### Encoding table encoding_table=codecs.charmap_build(decoding_table) ''') else: l.append(''' ### Encoding Map ''') l.extend(encoding_map_code) # Final new-line l.append('') return '\n'.join(l).expandtabs()
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x007f: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE 0x1b0a: 0x000c, # FORM FEED 0x1b14: 0x005e, # CIRCUMFLEX ACCENT 0x1b28: 0x007b, # LEFT CURLY BRACKET 0x1b29: 0x007d, # RIGHT CURLY BRACKET 0x1b2f: 0x005c, # REVERSE SOLIDUS 0x1b3c: 0x005b, # LEFT SQUARE BRACKET 0x1b3d: 0x007e, # TILDE 0x1b3e: 0x005d, # RIGHT SQUARE BRACKET 0x1b40: 0x007c, # VERTICAL LINE 0x1b65: 0x20ac, # EURO SIGN } ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map) extra_encoding_map = codecs.make_encoding_map(extra_decoding_map) if __name__ == '__main__': import string c = Codec() r = c.decode('0001000791282143F5000005E8329BFD06') print(r) #def test(s): # r = c.decode(c.encode(s))[0] # if r != s: print('in %r != out %r'%(s, r)) #test(unicode(string.letters)) #test(u'\u20ac') #test(u'\xa0') #try: # test(u'av\u20ad')
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self,input,errors='strict'): return codecs.charmap_encode(input,errors,encoding_map) def decode(self,input,errors='strict'): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): pass class StreamReader(Codec,codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA 0x00af: 0x00c6, # LATIN CAPITAL LETTER AE 0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00e7: 0x0647, # ARABIC LETTER HEH 0x00e8: 0x0648, # ARABIC LETTER WAW 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA 0x00ea: 0x064a, # ARABIC LETTER YEH 0x00eb: 0x064b, # ARABIC FATHATAN 0x00ec: 0x064c, # ARABIC DAMMATAN 0x00ed: 0x064d, # ARABIC KASRATAN 0x00ee: 0x064e, # ARABIC FATHA 0x00ef: 0x064f, # ARABIC DAMMA 0x00f0: 0x0650, # ARABIC KASRA 0x00f1: 0x0651, # ARABIC SHADDA 0x00f2: 0x0652, # ARABIC SUKUN 0x00f3: None, 0x00f4: None, 0x00f5: None, 0x00f6: None, 0x00f7: None, 0x00f8: None, 0x00f9: None, 0x00fa: None, 0x00fb: None, 0x00fc: None, 0x00fd: None, 0x00fe: None, 0x00ff: None, }) ### Encoding Map encoding_map = codecs.make_encoding_map(decoding_map)
""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py. Written by Marc-Andre Lemburg ([email protected]). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright 2000 Guido van Rossum. """#" import codecs ### Codec APIs class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API def getregentry(): return (Codec().encode, Codec().decode, StreamReader, StreamWriter) ### Decoding Map decoding_map = codecs.make_identity_dict(range(256)) decoding_map.update({ 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK