示例#1
0
 def GetConverterEncoding(self):
     convert_encoding = 'utf-8'
 
     if not settings.utf8_html:
         if settings.encoding:
             convert_encoding = settings.encoding
         else:
             convert_encoding = utils.getCurrentEncoding()
         
     return convert_encoding
示例#2
0
 def SaveToDisk(self, filename):
     source = self.webview.GetPageSource()
     if self.notebook.GetSelection() == 1:
         source = self.source.GetText()
         
     encoding = htmlutils.GetEncoding(source)
     try:
         if not encoding:
             encoding = utils.getCurrentEncoding()
         source = source.encode(encoding)
     except:
         raise
             
     afile = open(filename, "wb")
     afile.write(source)
     afile.close()
     self.dirty = False
     self.filename = filename
示例#3
0
def copyDependentFilesAndUpdateLinks(oldfile, filename):
    myanalyzer = analyzer.ContentAnalyzer()
    myanalyzer.analyzeFile(filename)
    htmldir = os.path.dirname(oldfile)
    html = utils.openFile(filename, "r").read()
    encoding = GetEncoding(html)
    if encoding == None:
        encoding = utils.getCurrentEncoding()
        
    html = utils.makeUnicode(html, encoding)
    
    if not encoding:
        encoding = utils.guessEncodingForText(text)
    
    if encoding and encoding.lower() in ["windows-1252", "iso-8859-1", "iso-8859-2"]:
        html = convNotSoSmartQuotesToHtmlEntity(html)
    
    for link in myanalyzer.fileLinks:
        sourcefile = GetFullPathForURL(link, htmldir)
        
        if os.path.exists(sourcefile):
            sourcedir = os.path.dirname(sourcefile)
            htmlname = os.path.basename(filename)
            depName = os.path.basename(link)
            destLink = u"../File/" + htmlname + "_files/" + depName
            destdir = os.path.join(settings.ProjectDir, os.path.dirname(destLink[3:].replace("/", os.sep)))
            if not os.path.exists(destdir):
                os.makedirs(destdir)
            result = fileutils.CopyFile(depName, sourcedir, destdir)
            if result:
                html = html.replace(link, urllib.quote(destLink))
            else:
                print "unable to copy file: " + sourcefile
        else:
            print "cannot find source file: " + sourcefile
                
    output = utils.openFile(filename, "w")
    output.write(html.encode(encoding))
    output.close()
示例#4
0
 def __init__(self):
     self.encoding = utils.getCurrentEncoding()
示例#5
0
    def ConvertFile(self, filename, outformat="html"):
        #we ignore outformat for command line tools and just use HTML
        import tempfile
        handle, htmlfile = tempfile.mkstemp()
        htmlfile = htmlfile.encode( utils.getCurrentEncoding() )
        os.close(handle)
        thirdpartydir = settings.ThirdPartyDir
        ext = string.lower(os.path.splitext(filename)[1])
        path = ""
        command = ""
        html = ""
        env = None
        use_stdout = True
        outformat = "html"

        if os.name == "nt":
            thirdpartydir = win32api.GetShortPathName(thirdpartydir)
            filename = win32api.GetShortPathName(filename)

        if sys.platform.startswith("darwin") and ext in [".doc", ".docx", ".rtf", ".rtfd"]:
            command = "textutil"
            args = ["-convert", "html", "-output", htmlfile, filename]
            use_stdout = False

        elif ext == ".doc":
            path = os.path.join(thirdpartydir, "wv")
            command = "wvWare"
            if not sys.platform.startswith("win"):
                env = {"LD_LIBRARY_PATH": "../lib"}
            args = ["--config " + os.path.join("..", "share", "wv", "wvHtml.xml")]
            args.append(filename)
            #outformat = "txt"
        elif ext == ".rtf":
            path = os.path.join(thirdpartydir, "unrtf")
            command = "unrtf" 
            args = [filename]
        elif ext == ".xls":
            path = os.path.join(thirdpartydir, "xlhtml")
            command = "xlhtml" 
            # -te is important, otherwise if a person has 30,000 blank
            # cells they'll end up in the converted document, making a
            # 30MB HTML page!
            args = ["-te", filename]
        elif ext == ".ppt":
            path = os.path.join(thirdpartydir, "xlhtml")
            command = "ppthtml" 
            args = [filename]
        elif ext == ".pdf":
            path = os.path.join(thirdpartydir, "pdftohtml")
            command = "pdftohtml"
            args = ["-noframes", "-stdout", filename]
        else:
            print "Cannot convert file because of unknown extension: " + filename
            
        if os.path.exists( os.path.join(path, "bin") ):
            path = os.path.join(path, "bin")
            
            if not sys.platform.startswith("win"):
                command = "./" + command

        try:
            oldcwd = os.getcwd()
            if os.path.exists(path):
                os.chdir(path)
            #print "working directory is %s" % path
            #Let's check for hung programs
            seconds = 0.0
            
            if not os.path.exists(filename):
                print "File '%s' doesn't exist!" % filename
 
            if sys.platform.startswith("win"):
                htmlfile = win32api.GetShortPathName(htmlfile)

            command = command.encode( utils.getCurrentEncoding() )
            
            mycommand = [command] + args
            print "Running command: '%s'" % string.join(mycommand, " ")
            myprocess = killableprocess.Popen( mycommand, stdout=subprocess.PIPE, env=env)
            
            import time
            runtime = 0.0
            killed = False
            while myprocess.poll() == None:
                time.sleep(0.01)
                runtime += 0.01
                if runtime >= 20.00:
                    myprocess.kill()
                    killed = True
                    break
            
            if use_stdout:
                if not killed:
                    html = myprocess.stdout.read()
                    
                output = utils.openFile(htmlfile, "wb")
                output.write(html) 
                output.close()

            #some utilities assume their own path for extracted images
            self._CleanupTempFiles(path)
            os.chdir(oldcwd)
            
        except:
            import traceback
            if traceback.print_exc() != None:
                print traceback.print_exc()
            print "Unable to convert document: " + filename #if we can't convert, oh well ;-)

        return htmlfile, outformat