def preprocess_data(self, data, path): mt = self.mime_map.get(path, self.guess_type(path)) if mt.lower() in OEB_DOCS: enc = self.codec_map.get(path, 'utf-8') html = data.decode(enc, 'replace') html = cleanup_html(html) data = html.encode('utf-8') if load_as_html(html): mt = 'text/html; charset=utf-8' else: mt = 'application/xhtml+xml; charset=utf-8' return data, mt