def GetConverterEncoding(self): convert_encoding = 'utf-8' if not settings.utf8_html: if settings.encoding: convert_encoding = settings.encoding else: convert_encoding = utils.getCurrentEncoding() return convert_encoding
def SaveToDisk(self, filename): source = self.webview.GetPageSource() if self.notebook.GetSelection() == 1: source = self.source.GetText() encoding = htmlutils.GetEncoding(source) try: if not encoding: encoding = utils.getCurrentEncoding() source = source.encode(encoding) except: raise afile = open(filename, "wb") afile.write(source) afile.close() self.dirty = False self.filename = filename
def copyDependentFilesAndUpdateLinks(oldfile, filename): myanalyzer = analyzer.ContentAnalyzer() myanalyzer.analyzeFile(filename) htmldir = os.path.dirname(oldfile) html = utils.openFile(filename, "r").read() encoding = GetEncoding(html) if encoding == None: encoding = utils.getCurrentEncoding() html = utils.makeUnicode(html, encoding) if not encoding: encoding = utils.guessEncodingForText(text) if encoding and encoding.lower() in ["windows-1252", "iso-8859-1", "iso-8859-2"]: html = convNotSoSmartQuotesToHtmlEntity(html) for link in myanalyzer.fileLinks: sourcefile = GetFullPathForURL(link, htmldir) if os.path.exists(sourcefile): sourcedir = os.path.dirname(sourcefile) htmlname = os.path.basename(filename) depName = os.path.basename(link) destLink = u"../File/" + htmlname + "_files/" + depName destdir = os.path.join(settings.ProjectDir, os.path.dirname(destLink[3:].replace("/", os.sep))) if not os.path.exists(destdir): os.makedirs(destdir) result = fileutils.CopyFile(depName, sourcedir, destdir) if result: html = html.replace(link, urllib.quote(destLink)) else: print "unable to copy file: " + sourcefile else: print "cannot find source file: " + sourcefile output = utils.openFile(filename, "w") output.write(html.encode(encoding)) output.close()
def __init__(self): self.encoding = utils.getCurrentEncoding()
def ConvertFile(self, filename, outformat="html"): #we ignore outformat for command line tools and just use HTML import tempfile handle, htmlfile = tempfile.mkstemp() htmlfile = htmlfile.encode( utils.getCurrentEncoding() ) os.close(handle) thirdpartydir = settings.ThirdPartyDir ext = string.lower(os.path.splitext(filename)[1]) path = "" command = "" html = "" env = None use_stdout = True outformat = "html" if os.name == "nt": thirdpartydir = win32api.GetShortPathName(thirdpartydir) filename = win32api.GetShortPathName(filename) if sys.platform.startswith("darwin") and ext in [".doc", ".docx", ".rtf", ".rtfd"]: command = "textutil" args = ["-convert", "html", "-output", htmlfile, filename] use_stdout = False elif ext == ".doc": path = os.path.join(thirdpartydir, "wv") command = "wvWare" if not sys.platform.startswith("win"): env = {"LD_LIBRARY_PATH": "../lib"} args = ["--config " + os.path.join("..", "share", "wv", "wvHtml.xml")] args.append(filename) #outformat = "txt" elif ext == ".rtf": path = os.path.join(thirdpartydir, "unrtf") command = "unrtf" args = [filename] elif ext == ".xls": path = os.path.join(thirdpartydir, "xlhtml") command = "xlhtml" # -te is important, otherwise if a person has 30,000 blank # cells they'll end up in the converted document, making a # 30MB HTML page! args = ["-te", filename] elif ext == ".ppt": path = os.path.join(thirdpartydir, "xlhtml") command = "ppthtml" args = [filename] elif ext == ".pdf": path = os.path.join(thirdpartydir, "pdftohtml") command = "pdftohtml" args = ["-noframes", "-stdout", filename] else: print "Cannot convert file because of unknown extension: " + filename if os.path.exists( os.path.join(path, "bin") ): path = os.path.join(path, "bin") if not sys.platform.startswith("win"): command = "./" + command try: oldcwd = os.getcwd() if os.path.exists(path): os.chdir(path) #print "working directory is %s" % path #Let's check for hung programs seconds = 0.0 if not os.path.exists(filename): print "File '%s' doesn't exist!" % filename if sys.platform.startswith("win"): htmlfile = win32api.GetShortPathName(htmlfile) command = command.encode( utils.getCurrentEncoding() ) mycommand = [command] + args print "Running command: '%s'" % string.join(mycommand, " ") myprocess = killableprocess.Popen( mycommand, stdout=subprocess.PIPE, env=env) import time runtime = 0.0 killed = False while myprocess.poll() == None: time.sleep(0.01) runtime += 0.01 if runtime >= 20.00: myprocess.kill() killed = True break if use_stdout: if not killed: html = myprocess.stdout.read() output = utils.openFile(htmlfile, "wb") output.write(html) output.close() #some utilities assume their own path for extracted images self._CleanupTempFiles(path) os.chdir(oldcwd) except: import traceback if traceback.print_exc() != None: print traceback.print_exc() print "Unable to convert document: " + filename #if we can't convert, oh well ;-) return htmlfile, outformat