def _read_lines(self, file_path):
        """Read the file at a path, and return its lines.

        Raises:
          IOError: If the file does not exist or cannot be read.

        """
        # Support the UNIX convention of using "-" for stdin.
        if file_path == '-':
            file = codecs.StreamReaderWriter(sys.stdin,
                                             codecs.getreader('utf8'),
                                             codecs.getwriter('utf8'),
                                             'replace')
        else:
            # We do not open the file with universal newline support
            # (codecs does not support it anyway), so the resulting
            # lines contain trailing "\r" characters if we are reading
            # a file with CRLF endings.
            # FIXME: This should use self.filesystem
            file = codecs.open(file_path, 'r', 'utf8', 'replace')

        try:
            contents = file.read()
        finally:
            file.close()

        lines = contents.split('\n')
        return lines
示例#2
0
def ProcessErrorList(_filename):
    error_dic_list = {}
    lines = []
    # Support the UNIX convention of using "-" for stdin.  Note that
    # we are not opening the file with universal newline support
    # (which codecs doesn't support anyway), so the resulting lines do
    # contain trailing '\r' characters if we are reading a file that
    # has CRLF endings.
    # If after the split a trailing '\r' is present, it is removed
    # below.
    if _filename == '-':
        lines = codecs.StreamReaderWriter(sys.stdin, codecs.getreader('utf8'),
                                          codecs.getwriter('utf8'),
                                          'replace').readlines()
    else:
        lines = codecs.open(_filename, 'rb', 'utf8', 'replace').readlines()

    for line in lines:
        info = __ErrorInfo(line)
        if not info.filename:
            continue
        if info.filename not in error_dic_list:
            error_dic_list[info.filename] = []

        error_dic_list[info.filename].append(info)

    return error_dic_list
示例#3
0
    def read_with_encoding(self, filename, document, codec_info, encoding):
        global cache

        f = None
        try:
            if not self.arguments[0] in cache:
                f = codecs.StreamReaderWriter(
                    urllib.request.urlopen(self.arguments[0]), codec_info[2],
                    codec_info[3], 'strict')
                lines = f.readlines()
                cache[self.arguments[0]] = lines
            else:
                lines = cache[self.arguments[0]]

            lines = dedent_lines(lines, self.options.get('dedent'))
            return lines
        except (IOError, OSError, urllib.error.URLError):
            return [
                document.reporter.warning(
                    'Include file %r not found or reading it failed' %
                    self.arguments[0],
                    line=self.lineno)
            ]
        except UnicodeError:
            return [
                document.reporter.warning(
                    'Encoding %r used for reading included file %r seems to '
                    'be wrong, try giving an :encoding: option' %
                    (encoding, self.arguments[0]))
            ]
        finally:
            if f is not None:
                f.close()
示例#4
0
    def open(self):
        mode = self.mode
        if 'b' not in mode:
            mode = mode + 'b'

        # We want to make sure that both unicode and str objects can be output
        # to the logger without the client code having to care about the
        # situation.
        #
        # The problem with the object returned from codecs.open(), is that it
        # assumes that whatever is given to write() is *already* in the
        # encoding specified as the parameter. This works most of the time,
        # but breaks down when we pass a string (byte seq) with øæå in latin-1
        # to a stream that assumes the input is in UTF-8.
        #
        # This is a slight variation of what codecs.open() does (and python's
        # logging module uses codecs.open() to enable various encodings for
        # the logs on file)
        stream = file(self.baseFilename, mode)
        encoder, decoder, reader, writer = codecs.lookup(self.encoding)

        srw = codecs.StreamReaderWriter(stream, reader, CerelogStreamWriter)
        srw.encoding = self.encoding
        srw.writer.encoding = srw.encoding
        srw.writer.encode = encoder

        self.stream = srw
        return self.stream
示例#5
0
 def VerifyUcs2Data(FileIn, FileName, Encoding):
     Ucs2Info = codecs.lookup('ucs-2')
     #
     # Convert to unicode
     #
     try:
         FileDecoded = codecs.decode(FileIn, Encoding)
         Ucs2Info.encode(FileDecoded)
     except:
         UniFile = BytesIO(FileIn)
         Info = codecs.lookup(Encoding)
         (Reader, Writer) = (Info.streamreader, Info.streamwriter)
         File = codecs.StreamReaderWriter(UniFile, Reader, Writer)
         LineNumber = 0
         ErrMsg = lambda Encoding, LineNumber: \
                  '%s contains invalid %s characters on line %d.' % \
                  (FileName, Encoding, LineNumber)
         while True:
             LineNumber = LineNumber + 1
             try:
                 Line = File.readline()
                 if Line == '':
                     EdkLogger.error('Unicode File Parser', PARSER_ERROR,
                                     ErrMsg(Encoding, LineNumber))
                 Ucs2Info.encode(Line)
             except:
                 EdkLogger.error('Unicode File Parser', PARSER_ERROR,
                                 ErrMsg('UCS-2', LineNumber))
示例#6
0
    def OpenUniFile(FileName):
        #
        # Read file
        #
        try:
            UniFile = open(FileName, mode='rb')
            FileIn = UniFile.read()
            UniFile.close()
        except:
            EdkLogger.Error("build", FILE_OPEN_FAILURE, ExtraData=File)

        #
        # Detect Byte Order Mark at beginning of file.  Default to UTF-8
        #
        Encoding = 'utf-8'
        if (FileIn.startswith(codecs.BOM_UTF16_BE)
                or FileIn.startswith(codecs.BOM_UTF16_LE)):
            Encoding = 'utf-16'

        UniFileClassObject.VerifyUcs2Data(FileIn, FileName, Encoding)

        UniFile = BytesIO(FileIn)
        Info = codecs.lookup(Encoding)
        (Reader, Writer) = (Info.streamreader, Info.streamwriter)
        return codecs.StreamReaderWriter(UniFile, Reader, Writer)
示例#7
0
def main():
    """Main entry function."""
    if len(sys.argv) < 3:
        print('Usage: <project-name> <filetype> <list-of-path to traverse>')
        print('\tfiletype can be python/cpp/all')
        exit(-1)
    _HELPER.project_name = sys.argv[1]
    file_type = sys.argv[2]
    allow_type = []
    if file_type == 'python' or file_type == 'all':
        allow_type += [x for x in PYTHON_SUFFIX]
    if file_type == 'cpp' or file_type == 'all':
        allow_type += [x for x in CXX_SUFFIX]
    allow_type = set(allow_type)
    if sys.version_info.major == 2 and os.name != 'nt':
        sys.stderr = codecs.StreamReaderWriter(sys.stderr,
                                               codecs.getreader('utf8'),
                                               codecs.getwriter('utf8'),
                                               'replace')
    for path in sys.argv[3:]:
        if os.path.isfile(path):
            process(path, allow_type)
        else:
            for root, dirs, files in os.walk(path):
                for name in files:
                    process(os.path.join(root, name), allow_type)

    nerr = _HELPER.print_summary(sys.stderr)
    sys.exit(nerr > 0)
示例#8
0
 def __init__(self, buffer):
     # TextIOWrapper closes its underlying buffer on close *and* can't
     # handle actual file objects (on python 2)
     self.buffer = codecs.StreamReaderWriter(
         stream=buffer,
         Reader=codecs.getreader('utf-8'),
         Writer=codecs.getwriter('utf-8'))
示例#9
0
文件: code.py 项目: fzheng01/codejam
 def read_with_encoding(self, filename, document, codec_info, encoding):
     f = None
     try:
         f = codecs.StreamReaderWriter(open(filename, 'rb'), codec_info[2],
                                       codec_info[3], 'strict')
         lines = f.readlines()
         lines = dedent_lines(lines, self.options.get('dedent'))
         return lines
     except (IOError, OSError):
         return [
             document.reporter.warning(
                 'Include file %r not found or reading it failed' %
                 filename,
                 line=self.lineno)
         ]
     except UnicodeError:
         return [
             document.reporter.warning(
                 'Encoding %r used for reading included file %r seems to '
                 'be wrong, try giving an :encoding: option' %
                 (encoding, filename))
         ]
     finally:
         if f is not None:
             f.close()
示例#10
0
    def test_code_changed(self):
        codec = codecs.lookup('utf8')

        with tempfile.NamedTemporaryFile('wb') as tmp_file1, tempfile.NamedTemporaryFile('wb') as tmp_file2, \
                codecs.StreamReaderWriter(tmp_file1, codec.streamreader, codec.streamwriter, 'strict') as file1, \
                codecs.StreamReaderWriter(tmp_file2, codec.streamreader, codec.streamwriter, 'strict') as file2:
            reloader = _PollingReloader('example_service.standalone',
                                        ['pysoa'])

            file1.write('test 1')
            file1.flush()

            file2.write('test 2')
            file2.flush()

            # noinspection PyUnresolvedReferences
            with mock.patch.object(target=reloader,
                                   attribute='get_watch_file_names'
                                   ) as mock_get_watch_file_names:
                mock_get_watch_file_names.return_value = [
                    file1.name, file2.name
                ]

                self.assertFalse(reloader.code_changed())

                time.sleep(1.1)

                file1.write('test changed 1')
                file1.flush()

                self.assertTrue(reloader.code_changed())
                self.assertFalse(reloader.code_changed())

                time.sleep(1.1)

                file2.write('test changed 2')
                file2.flush()

                self.assertTrue(reloader.code_changed())
                self.assertFalse(reloader.code_changed())

                time.sleep(1.1)

                file2.write('test changed 2 again')
                file2.flush()

                self.assertTrue(reloader.code_changed())
示例#11
0
    def __init__(
        self,
        host=PUDB_RDB_HOST,
        port=PUDB_RDB_PORT,
        port_search_limit=100,
        out=sys.stdout,
        term_size=None,
        reverse=False,
    ):
        """
        :arg term_size: A two-tuple ``(columns, rows)``, or *None*. If *None*,
            try to determine the terminal size automatically.

            Currently, this uses a heuristic: It uses the terminal size of the
            debuggee as that for the debugger. The idea is that you might be
            running both in two tabs of the same terminal window, hence using
            terminals of the same size.
        """
        self.out = out

        if term_size is None:
            try:
                s = struct.unpack("hh", fcntl.ioctl(1, termios.TIOCGWINSZ, "1234"))
                term_size = (s[1], s[0])
            except Exception:
                term_size = (80, 24)

        self._prev_handles = sys.stdin, sys.stdout
        self._client, (address, port) = self.get_client(
            host=host, port=port, search_limit=port_search_limit, reverse=reverse
        )
        self.remote_addr = ":".join(str(v) for v in address)

        self.say(SESSION_STARTED.format(self=self))

        # makefile ignores encoding if there's no buffering.
        raw_sock_file = self._client.makefile("rwb", 0)
        import codecs

        sock_file = codecs.StreamReaderWriter(
            raw_sock_file, codecs.getreader("utf-8"), codecs.getwriter("utf-8"))

        self._handle = sys.stdin = sys.stdout = sock_file

        # nc negotiation doesn't support telnet options
        if not reverse:
            import telnetlib as tn

            raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA)
            resp = raw_sock_file.read(3)
            assert resp == tn.IAC + tn.DO + tn.SGA

            raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO)
            resp = raw_sock_file.read(3)
            assert resp == tn.IAC + tn.DO + tn.ECHO

        Debugger.__init__(
            self, stdin=self._handle, stdout=self._handle, term_size=term_size
        )
示例#12
0
def _temp_fixture_file_name_context(contents):
    temp_file = tempfile.NamedTemporaryFile(mode='wb')
    codec = codecs.lookup('utf-8')
    with codecs.StreamReaderWriter(temp_file, codec.streamreader, codec.streamwriter, 'strict') as writer:
        writer.write(contents)
        writer.flush()

        yield temp_file.name
示例#13
0
def _wrap_stream_for_codec(f, encoding=None, errors='strict'):
    if encoding is None:
        encoding = 'utf-8'
    info = codecs.lookup(encoding)
    f = codecs.StreamReaderWriter(f, info.streamreader, info.streamwriter,
                                  errors)
    f.encoding = encoding
    return f
示例#14
0
def write_csv(headers, data):
    csv_data = cStringIO.StringIO()
    codecinfo = codecs.lookup("utf8")
    wrapper = codecs.StreamReaderWriter(csv_data, codecinfo.streamreader,
                                        codecinfo.streamwriter)
    writer = csv.writer(wrapper)
    writer.writerow(headers)
    map(lambda row: writer.writerow(row), data)
    return csv_data.getvalue()
示例#15
0
 def matchFileContents(self, path):
     with self.view.open(path, "rb") as rb:
         w = codecs.getwriter("utf-8")
         r = codecs.getreader("utf-8")
         reader = codecs.StreamReaderWriter(rb, r, w)
         for line in reader.readlines():
             if self.pattern in line:
                 return True
     return False
示例#16
0
文件: remote.py 项目: mm40/pudb
    def __init__(
        self,
        host=PUDB_RDB_HOST,
        port=PUDB_RDB_PORT,
        port_search_limit=100,
        out=sys.stdout,
        term_size=None,
        reverse=False,
    ):
        self.active = True
        self.out = out

        self._prev_handles = sys.stdin, sys.stdout
        self._client, (address,
                       port) = self.get_client(host=host,
                                               port=port,
                                               search_limit=port_search_limit,
                                               reverse=reverse)
        self.remote_addr = ":".join(str(v) for v in address)

        self.say(SESSION_STARTED.format(self=self))

        # makefile ignores encoding if there's no buffering.
        raw_sock_file = self._client.makefile("rwb", 0)
        import codecs

        if sys.version_info[0] < 3:
            sock_file = codecs.StreamRecoder(
                raw_sock_file,
                codecs.getencoder("utf-8"),
                codecs.getdecoder("utf-8"),
                codecs.getreader("utf-8"),
                codecs.getwriter("utf-8"),
            )
        else:
            sock_file = codecs.StreamReaderWriter(raw_sock_file,
                                                  codecs.getreader("utf-8"),
                                                  codecs.getwriter("utf-8"))

        self._handle = sys.stdin = sys.stdout = sock_file

        # nc negotiation doesn't support telnet options
        if not reverse:
            import telnetlib as tn

            raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA)
            resp = raw_sock_file.read(3)
            assert resp == tn.IAC + tn.DO + tn.SGA

            raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO)
            resp = raw_sock_file.read(3)
            assert resp == tn.IAC + tn.DO + tn.ECHO

        Debugger.__init__(self,
                          stdin=self._handle,
                          stdout=self._handle,
                          term_size=term_size)
示例#17
0
文件: remote.py 项目: crimoniv/pudb
    def __init__(self,
                 host=PUDB_RDB_HOST,
                 port=PUDB_RDB_PORT,
                 port_search_limit=100,
                 out=sys.stdout,
                 term_size=None):
        self.active = True
        self.out = out

        self._prev_handles = sys.stdin, sys.stdout

        self._sock, this_port = self.get_avail_port(host, port,
                                                    port_search_limit)
        self._sock.setblocking(1)
        self._sock.listen(1)
        self.ident = '{0}:{1}'.format(self.me, this_port)
        self.host = host
        self.port = this_port
        self.say(BANNER.format(self=self))

        self._client, address = self._sock.accept()
        self._client.setblocking(1)
        self.remote_addr = ':'.join(str(v) for v in address)
        self.say(SESSION_STARTED.format(self=self))

        # makefile ignores encoding if there's no buffering.
        raw_sock_file = self._client.makefile("rwb", 0)
        import codecs

        if sys.version_info[0] < 3:
            sock_file = codecs.StreamRecoder(raw_sock_file,
                                             codecs.getencoder("utf-8"),
                                             codecs.getdecoder("utf-8"),
                                             codecs.getreader("utf-8"),
                                             codecs.getwriter("utf-8"))
        else:
            sock_file = codecs.StreamReaderWriter(raw_sock_file,
                                                  codecs.getreader("utf-8"),
                                                  codecs.getwriter("utf-8"))

        self._handle = sys.stdin = sys.stdout = sock_file

        import telnetlib as tn

        raw_sock_file.write(tn.IAC + tn.WILL + tn.SGA)
        resp = raw_sock_file.read(3)
        assert resp == tn.IAC + tn.DO + tn.SGA

        raw_sock_file.write(tn.IAC + tn.WILL + tn.ECHO)
        resp = raw_sock_file.read(3)
        assert resp == tn.IAC + tn.DO + tn.ECHO

        Debugger.__init__(self,
                          stdin=self._handle,
                          stdout=self._handle,
                          term_size=term_size)
示例#18
0
  def create_node(self, filename, rel_filename, lang):
    document = self.state.document
    env = document.settings.env

    # Read the contents of the file to include
    encoding = self.options.get('encoding', env.config.source_encoding)
    codec_info = codecs.lookup(encoding)

    try:
      f = codecs.StreamReaderWriter(open(filename, 'rb'),
          codec_info[2], codec_info[3], 'strict')
      lines = f.readlines()
      f.close()
    except (IOError, OSError):
      print_err('Failed to read %r' % filename)
      return [document.reporter.warning(
        'Include file %r not found or reading it failed' % filename,
        line=self.lineno)]
    except UnicodeError:
      print_err('Encoding %r used for reading included file %r seems to '
        'be wrong, try giving an :encoding: option' %
        (encoding, filename))
      return [document.reporter.warning(
        'Encoding %r used for reading included file %r seems to '
        'be wrong, try giving an :encoding: option' %
        (encoding, filename))]

    objectname = self.options.get('pyobject')

    if objectname is not None:
      from sphinx.pycode import ModuleAnalyzer
      analyzer = ModuleAnalyzer.for_file(filename, '')
      tags = analyzer.find_tags()

      if objectname not in tags:
        return [document.reporter.warning(
          'Object named %r not found in include file %r' %
          (objectname, filename), line=self.lineno)]
      else:
        lines = lines[tags[objectname][1]-1 : tags[objectname][2]-1]

    linespec = self.options.get('lines')
    if linespec is not None:
      try:
        linelist = parselinenos(linespec, len(lines))
      except ValueError, err:
        return [document.reporter.warning(str(err), line=self.lineno)]

      # just ignore nonexisting lines
      nlines = len(lines)
      lines = [lines[i] for i in linelist if i < nlines]

      if not lines:
        return [document.reporter.warning(
          'Line spec %r: no lines pulled from include file %r' %
          (linespec, filename), line=self.lineno)]
示例#19
0
 def _parseBuckOut(file_path):
     if file_path.endswith('.gz'):
         with gzip.open(file_path, 'rb') as raw_log:
             info = codecs.lookup('utf-8')
             utf8_log = codecs.StreamReaderWriter(raw_log,
                                                  info.streamreader,
                                                  info.streamwriter)
             return RuleKeyStructureInfo._parseLogFile(utf8_log)
     with io.open(file_path, mode='r', encoding='utf-8') as buck_out:
         return RuleKeyStructureInfo._parseLogFile(buck_out)
示例#20
0
 def _parseBuckOut(file_path):
     if file_path.endswith(".gz"):
         with gzip.open(file_path, "rb") as raw_log:
             info = codecs.lookup("utf-8")
             utf8_log = codecs.StreamReaderWriter(raw_log,
                                                  info.streamreader,
                                                  info.streamwriter)
             return RuleKeyStructureInfo._parseLogFile(utf8_log)
     with io.open(file_path, mode="r", encoding="utf-8") as buck_out:
         return RuleKeyStructureInfo._parseLogFile(buck_out)
示例#21
0
def open_file(path, mode='r', encoding='utf-8', **kwargs):
    if mode in ('r', 'w'):
        info = codecs.lookup(encoding)
        open_binary = FileSystems.open if mode == 'r' else FileSystems.create
        with open_binary(path, **kwargs) as fp:
            # Python 3 CSV package expects a text file
            yield codecs.StreamReaderWriter(fp, info.streamreader,
                                            info.streamwriter)
    else:
        raise ValueError('invalid mode: %s' % mode)
示例#22
0
def uopen(fileName, encoding = 'ascii', mode = 'r'):
    encoder, decoder, streamReader, streamWriter = codecs.lookup(encoding)

    fd = zopen(fileName, mode)

    if mode == 'w' or mode == 'a':
	return streamWriter(fd)
    elif mode == 'r':
	return streamReader(fd)
    else:
	return codecs.StreamReaderWriter(fd, streamReader, streamWriter)
示例#23
0
 def _unicode_open(file, encoding, errors='strict'):
     info = codecs.lookup(encoding)
     if isinstance(info, tuple):
         reader = info[2]
         writer = info[3]
     else:
         reader = info.streamreader
         writer = info.streamwriter
     srw = codecs.StreamReaderWriter(file, reader, writer, errors)
     srw.encoding = encoding
     return srw
示例#24
0
 def __init__(self, enabled=True):
     self.enabled = enabled
     self.orig_stdout = sys.stdout
     self.cap_stdout = cStringIO()
     if six.PY2:
         # http://stackoverflow.com/questions/1817695/stringio-accept-utf8
         codecinfo = codecs.lookup('utf8')
         self.cap_stdout = codecs.StreamReaderWriter(
             self.cap_stdout, codecinfo.streamreader,
             codecinfo.streamwriter)
     self.text = None
示例#25
0
 def __init__(self, name, filters, tpl, ctx, version_filename=False):
     """Inits the PatchBuffer class"""
     self._buffer = cStringIO.StringIO()
     self._codecinfo = codecs.lookup("utf-8")
     self._wrapper = codecs.StreamReaderWriter(self._buffer,
                                       self._codecinfo.streamreader, self._codecinfo.streamwriter)
     self.name = name
     self.filters = filters
     self.tpl = tpl
     self.ctx = ctx
     self.version_filename = version_filename
     self.modified = False
示例#26
0
def apply_htmlparser(html, maxcol=MAXCOL, codec='utf8'):
    """This function extracts from the HTML string by passing it through a
        htmllib.HTMLParser instance (slightly modified for Unicode support).

        Adapted from http://www.bazza.com/~eaganj/weblog/2006/04/04/printing-html-as-text-in-python-with-unicode/

        @type  html: unicode
        @param html: The HTML to extract text from (eg. u"<html><body><h1>Hello</h1>...")
        @type  maxcol: int
        @param maxcol: The maxcol value to passed to formatter.DumbWriter()
        @type  codec: str (passed to codecs.lookup())
        @param codec: The codec to use to parse the HTML.

        @rtype : str
        @return: The text parsed from the HTML."""

    class UnicodeHTMLParser(htmllib.HTMLParser):
        """HTMLParser that can handle unicode charrefs"""

        entitydefs = dict([ (k, unichr(v)) for k, v in htmlentitydefs.name2codepoint.items() ])

        def handle_charref(self, name):
            """Override builtin version to return unicode instead of binary strings for 8-bit chars."""
            try:
                n = int(name)
            except ValueError:
                self.unknown_charref(name)
                return
            if not 0 <= n <= 255:
                self.unknown_charref(name)
                return
            if 0 <= n <= 127:
                self.handle_data(chr(n))
            else:
                self.handle_data(unichr(n))

    sio = StringIO()
    encoder, decoder, reader, writer = codecs.lookup(codec)
    codecio = codecs.StreamReaderWriter(sio, reader, writer, 'replace')
    writer = formatter.DumbWriter(codecio, maxcol)
    prettifier = formatter.AbstractFormatter(writer)

    parser = UnicodeHTMLParser(prettifier)
    parser.feed(html)
    parser.close()

    codecio.seek(0)
    result = codecio.read()
    sio.close()
    codecio.close()

    return result
示例#27
0
def uopen(fileName, encoding='utf-8', mode='r'):
    encoder, decoder, streamReader, streamWriter = codecs.lookup(encoding)

    fd = zopen(fileName, mode)
    if fd in [sys.stdout, sys.stdin, sys.stderr]:
        return fd

    if mode == 'w' or mode == 'a':
        return streamWriter(fd)
    elif mode == 'r':
        return streamReader(fd)
    else:
        return codecs.StreamReaderWriter(fd, streamReader, streamWriter)
示例#28
0
def ConvertHtmlToText(strHTML):
    ''' strHTML should passed as utf8 text'''
    class Formatter(formatter.AbstractFormatter):
        def add_line_break(self):

            if not (self.hard_break or self.para_end):
                self.have_label = self.parskip = 0

            self.writer.send_line_break()
            self.hard_break = self.nospace = 1
            self.softspace = 0

    class HTML2TextParser(htmllib.HTMLParser):
        # entities should be encoded as utf8
        entitydefs = dict([(k, unichr(v).encode('utf_8'))
                           for k, v in htmlentitydefs.name2codepoint.items()])

        def anchor_end(self):
            if self.anchor:
                self.anchor = None

        def convert_charref(self, name):
            try:
                n = unichr(int(name))
            except ValueError:
                return
            return self.convert_codepoint(int(name))

        def convert_codepoint(self, codepoint):
            # codepoint should also be encoded as utf8
            return unichr(codepoint).encode('utf_8')

        def handle_image(self, src, alt, *args):
            # ignore images
            pass

    # cStringIO for output string stream
    sio = cStringIO.StringIO()
    encoder, decoder, reader, writer = codecs.lookup('utf_8')
    utf8io = codecs.StreamReaderWriter(sio, reader, writer, 'replace')
    writer = formatter.DumbWriter(sio)
    prettifier = Formatter(writer)
    parser = HTML2TextParser(prettifier)
    # Parse HTML to plain text
    parser.feed(strHTML)
    parser.close()
    utf8io.seek(0)
    result = utf8io.read()
    sio.close()
    utf8io.close()
    return result.replace('\n', '\r\n')
示例#29
0
        def _open(self):
            """Opens the log file without handle inheritance but with file sharing.

      Ignores self.mode.
      """
            f = shared_open(self.baseFilename)
            if self.encoding:
                # Do the equivalent of
                # codecs.open(self.baseFilename, self.mode, self.encoding)
                info = codecs.lookup(self.encoding)
                f = codecs.StreamReaderWriter(f, info.streamreader,
                                              info.streamwriter, 'replace')
                f.encoding = self.encoding
            return f
示例#30
0
def use_codec(open_file, encoding=None, errors='strict'):
    """
    This is the same as "codecs.open()" but it uses
    an already open file instead of a file path to open.

    """
    if encoding is None:
        return open_file
    info = codecs.lookup(encoding)
    srw = codecs.StreamReaderWriter(open_file, info.streamreader,
                                    info.streamwriter, errors)
    # Add attributes to simplify introspection
    srw.encoding = encoding
    return srw