Пример #1
0
 def get_filetype(self, filepath, dom):
     """Note to developer: Try to make it so that this function
     can never silently fail, even if for example a JPEG file is attempted.
     """
     logger.debug(util.funcName('begin'))
     filetype = ""
     if dom is None:
         raise exceptions.FileAccessError("Error with file: %s", filepath)
     docElem = dom.documentElement
     docElemChild = None
     if docElem.hasChildNodes():
         if len(docElem.childNodes) >= 2:
             docElemChild = docElem.childNodes[1]
         else:
             docElemChild = docElem.childNodes[0]
     if not docElemChild:
         raise exceptions.FileAccessError(
             "File does not seem to be from Toolbox or FieldWorks: %s",
             filepath)
     elif (docElem.nodeName == "database"
           and re.match(r"[a-zA-Z0-9]+Group", docElemChild.nodeName)):
         filetype = "toolbox"
     elif (docElem.nodeName == "document"
           and docElemChild.nodeName == "interlinear-text"):
         filetype = "fieldworks"
     else:
         raise exceptions.FileAccessError(
             "File does not seem to be from Toolbox or FieldWorks: %s",
             filepath)
     logger.debug("File type is %s", filetype)
     return filetype
Пример #2
0
    def loadLibrary(self):
        """Load ECDriver library."""
        if self.loaded:
            return
        if platform.system() == "Windows":
            libfile = "ECDriver"
            wide = 'W'  # use functions named with 'W' for wide characters
        else:
            wide = ''
            ## Look for libecdriver.so in order of location precedence.
            liblocs = [(prefix, dirname, libname)
                       for dirname in ("encConverters", "fieldworks")
                       for prefix in ("/usr/local", "/usr")
                       for libname in ("libecdriver.so", "libecdriver_64.so")]
            libfile = ""
            for prefix, dirname, libname in liblocs:
                filepath = os.path.join(prefix, "lib", dirname, libname)
                if os.path.exists(filepath):
                    libfile = filepath
                    break
            if not libfile:
                # Perhaps it is in current dir, LD_LIBRARY_PATH or ldconfig.
                libfile = "libecdriver.so"
        logger.debug("Loading %s", libfile)
        try:
            libecdriver = ctypes.cdll.LoadLibrary(libfile)
        except OSError as exc:
            raise exceptions.FileAccessError("Library error: %s.", exc)

        logger.debug("Getting functions from library")
        try:
            self.funcIsEcInstalled = libecdriver.IsEcInstalled
            self.funcSelectConverter = getattr(
                libecdriver, 'EncConverterSelectConverter' + wide)
            self.funcInitConverter = getattr(
                libecdriver, 'EncConverterInitializeConverter' + wide)
            self.funcConvertString = getattr(
                libecdriver, 'EncConverterConvertString' + wide)
            self.funcDescription = getattr(
                libecdriver, 'EncConverterConverterDescription' + wide)
            if platform.system() == "Linux":
                self.funcCleanup = libecdriver.Cleanup
        except AttributeError as exc:
            raise exceptions.FileAccessError("Library error: %s.", exc)
        logger.debug("Library successfully loaded.")
        try:
            self.funcAddConverter = getattr(libecdriver,
                                            'EncConverterAddConverter' + wide)
        except AttributeError as exc:
            logger.warning("Could not load AddConverter function.")
        self.loaded = True
Пример #3
0
 def read(self):
     """Read in the data.
     Returns list with elements of type FontItem.
     Tries to overcome several zipfile reading exceptions that may occur.
     """
     logger.debug(util.funcName('begin'))
     try:
         self.make_temp_dir()
     except exceptions.FileAccessError as exc:
         self.msgbox.displayExc(exc)
         return list()
     data = None
     self.progressRange_partNum = 0
     try:
         data = self.readFile()
     except zipfile.BadZipFile as exc:
         logger.warning(exc)
         self.convert_to_odt()
         data = self.readFile()
     except exceptions.FileAccessError as exc:
         if exc.msg.startswith("Error reading file"):
             logger.warning(exc)
             self.convert_to_odt()
             data = self.readFile()
         else:
             raise exc
     except FileNotFoundError as exc:
         raise exceptions.FileAccessError(str(exc))
     #self.cleanup()
     logger.debug(util.funcName('end'))
     return data
    def pickConverter(self):
        """Let the user pick a converter."""
        logger.debug(util.funcName('begin'))
        self.loadLibrary()
        if not self.loaded:
            return
        if not self.funcIsEcInstalled():
            raise exceptions.FileAccessError(
                "EncConverters does not seem to be installed properly.")
        bufConverterName = createBuffer(1024)
        c_forward = ctypes.c_bool(False)
        c_normForm = ctypes.c_ushort(0)
        logger.debug("Calling funcSelectConverter.")
        status = self.funcSelectConverter(bufConverterName,
                                          ctypes.byref(c_forward),
                                          ctypes.byref(c_normForm))
        if status == -1:
            logger.debug(
                "EncConverters returned %d.  User probably pressed Cancel.",
                status)
            return
        verifyStatusOk(status)

        logger.debug("Converter name was %r", bufConverterName.value)
        self.config = ConverterSettings(self.config.userVars)
        if platform.system() == "Windows":
            self.config.convName = bufConverterName.value
        else:
            self.config.convName = bufConverterName.value.decode("utf-8")
        self.config.forward = c_forward.value
        self.config.normForm = c_normForm.value
        logger.debug(util.funcName('end'))
 def loadFile(self, filepath):
     """Returns dom, raises exceptions.FileAccessError."""
     logger.debug(util.funcName('begin', args=filepath))
     if not os.path.exists(filepath):
         raise exceptions.FileAccessError("Cannot find file %s", filepath)
     dom = None
     try:
         dom = xml.dom.minidom.parse(filepath)
     except xml.parsers.expat.ExpatError as exc:
         raise exceptions.FileAccessError("Error reading file %s\n\n%s",
                                          filepath,
                                          str(exc).capitalize())
     if dom is None:
         raise exceptions.FileAccessError("Error reading file %s", filepath)
     logger.debug(util.funcName('end'))
     return dom
Пример #6
0
    def _fillInData(self, wordList, word_i1, word_i2):
        data = []
        for word_i in range(word_i1, word_i2 + 1):
            word = wordList[word_i]
            colOrd = self.colOrder  # shorthand variable name
            colOrd.resetRowData()
            colOrd.setRowVal('colWord', word.text)
            colOrd.setRowVal('colOccur', word.occurrences)
            colOrd.setRowVal('colOk', word.isCorrect_str())
            colOrd.setRowVal('colChange', word.correction)
            colOrd.setRowVal('colSrc', word.sources_str())
            data.append(colOrd.getRowTuple())

        row1 = word_i1 + 2  # start at second row, so index 0 is row 2
        row2 = word_i2 + 2
        col2 = chr(ord('A') + len(self.colOrder.COLUMNS) - 1)
        rangeName = "A%d:%s%d" % (row1, col2, row2)
        logger.debug("Adding %d rows to range %s", len(data), rangeName)
        #logger.debug(repr(data))
        oRange = self.sheet.getCellRangeByName(rangeName)
        try:
            oRange.setDataArray(tuple(data))
        except RuntimeException as exc:
            raise exceptions.FileAccessError(
                "There was a problem while writing the list.\n\n%s", exc)
        self.progressRanges.update(word_i1)
Пример #7
0
 def convert_to_odt(self):
     """Opens a file such as .doc, saves as .odt and then closes it."""
     logger.debug(util.funcName('begin'))
     self.incrementProgressPart()
     basename = os.path.basename(self.fileconfig.filepath)
     name, dummy_ext = os.path.splitext(basename)
     newpath = os.path.join(self.tempBaseDir, name + "_converted.odt")
     if os.path.exists(newpath):
         logger.warning("File already exists: %s", newpath)
         self.fileconfig.filepath = newpath
         logger.debug(util.funcName('return'))
         return
     doc_loader = doc_reader.DocReader(self.fileconfig, self.unoObjs, 0)
     doc_loader.loadDoc(self.fileconfig.filepath)
     loaded_doc = doc_loader.doc
     uno_args = (
         #util.createProp("FilterName", "StarOffice XML (Writer)"),
         #util.createProp("FilterName", "writer8"),
         util.createProp("Overwrite", False), )
     logger.debug("Saving as %s", newpath)
     fileUrl = uno.systemPathToFileUrl(os.path.realpath(newpath))
     try:
         loaded_doc.document.storeAsURL(fileUrl, uno_args)
     except ErrorCodeIOException:
         raise exceptions.FileAccessError("Error saving %s", newpath)
     try:
         loaded_doc.document.close(True)
     except CloseVetoException:
         logger.warning("Could not close %s", newpath)
     self.fileconfig.filepath = newpath
     self.incrementProgressPart()
     logger.debug(util.funcName('end'))
    def addConverter(self, mappingName, converterSpec, conversionType,
                     leftEncoding, rightEncoding, processType):
        """Add a converter to the repository.
        Used for automated testing.

        :param mappingName: friendly name key that the converter is to be
                            accessed with
        :param converterSpec: technical spec of the converter
                              (e.g. TECkit & CC = filespec to map)
        :param conversionType: ConvType parameter indicating the type of
                               conversion (e.g. "Legacy_to_from_Unicode")
        :param leftEncoding: optional technical name of the left-hand side
                             encoding (e.g. SIL-ANNAPURNA-05)
        :param rightEncoding: optional technical name of the right-hand side
                              encoding (e.g. UNICODE)
        :param processType: ProcessTypeFlags flag to indicate the
                            implementation/transduction
                            type (e.g. UnicodeEncodingConversion) from which
                            you can do later filtering (e.g. ByEncodingID)
        """
        logger.debug(
            util.funcName('begin',
                          args=(mappingName, converterSpec, conversionType,
                                leftEncoding, rightEncoding, processType)))
        self.loadLibrary()
        if not self.funcIsEcInstalled():
            raise exceptions.FileAccessError(
                "EncConverters does not seem to be installed properly.")
        if not self.funcAddConverter:
            raise exceptions.FileAccessError(
                "Could not get AddConverter function.  "
                "Automatically adding a converter requires SEC4.0 or higher.")
        c_convName = getStringParam(mappingName)
        if not c_convName:
            raise exceptions.LogicError("No converter was specified.")
        c_convSpec = getStringParam(converterSpec)
        c_convType = ctypes.c_ushort(conversionType)
        c_leftEnc = getStringParam(leftEncoding)
        c_rightEnc = getStringParam(rightEncoding)
        c_processType = ctypes.c_ushort(processType)
        logger.debug("Calling funcAddConverter.")
        status = self.funcAddConverter(c_convName, c_convSpec, c_convType,
                                       c_leftEnc, c_rightEnc, c_processType)
        verifyStatusOk(status)
        logger.debug(util.funcName('end'))
Пример #9
0
 def _read(self):
     logger.debug("Parsing file %s", self.filepath)
     if not os.path.exists(self.filepath):
         raise exceptions.FileAccessError("Cannot find file %s",
                                          self.filepath)
     try:
         self.dom = xml.dom.minidom.parse(self.filepath)
     except xml.parsers.expat.ExpatError as exc:
         raise exceptions.FileAccessError("Error reading file %s\n\n%s",
                                          self.filepath,
                                          str(exc).capitalize())
     logger.debug("Parse finished.")
     self.progressBar.updatePercent(60)
     if self.fileconfig.filetype == 'spellingStatus':
         self.read_spellingStatus_file()
     else:
         raise exceptions.FileAccessError("Unexpected file type %s",
                                          self.fileconfig.filetype)
def verifyStatusOk(status):
    """Raises exception if not ok."""
    if status == ErrStatus.NoError:
        return
    description = ""
    if status in ErrStatus.DESCRIPTIONS:
        description = " (%s)" % ErrStatus.DESCRIPTIONS[status]
    raise exceptions.FileAccessError("Error: EncConverters returned %d%s.",
                                     status, description)
 def _read(self):
     try:
         self.loadDoc(self.filepath)
     except (exceptions.FileAccessError, exceptions.DocAccessError):
         raise exceptions.FileAccessError("Error reading file %s",
                                          self.filepath)
     self.progressBar.updatePercent(60)
     self.read_document()
     logger.debug("Setting visible.")
     self.doc.window.setVisible(True)
Пример #12
0
 def _read(self):
     """Read in the data.  Modifies self.data"""
     logger.debug("Parsing file %s", self.filepath)
     if not os.path.exists(self.filepath):
         raise exceptions.FileAccessError(
             "Cannot find file %s", self.filepath)
     self.progressBar.updatePercent(30)
     self.read_sfm_file()
     for dummy_marker, value in self.rawData:
         word = wordlist_structs.WordInList()
         word.text = value
         word.source = self.filepath
         self.data.append(word)
 def _read(self):
     filetype = self.get_filetype()
     self.progressBar.updatePercent(30)
     logger.debug("Parsing file %s", self.filepath)
     if not os.path.exists(self.filepath):
         raise exceptions.FileAccessError("Cannot find file %s",
                                          self.filepath)
     try:
         self.dom = xml.dom.minidom.parse(self.filepath)
     except xml.parsers.expat.ExpatError as exc:
         raise exceptions.FileAccessError("Error reading file %s\n\n%s",
                                          self.filepath,
                                          str(exc).capitalize())
     logger.debug("Parse finished.")
     self.progressBar.updatePercent(60)
     if filetype == 'paxml':
         self.read_paxml_file()
     elif filetype == 'lift':
         self.read_lift_file()
     elif filetype == 'xml':
         self.read_toolbox_file()
     else:
         raise exceptions.LogicError("Unexpected file type %s", filetype)
Пример #14
0
    def _read(self):
        progressRange = ProgressRange(ops=len(self.config.fileList),
                                      pbar=self.progressBar)
        progressRange.partSize = 3
        self.suggestions = []
        self.duplicate_refnums = set()
        list_index = 1  # 1-based index of current element in list
        for fileItem in self.config.fileList:
            logger.debug("Parsing file %s", fileItem.filepath)
            self.prefix = fileItem.prefix
            self.use_segnum = fileItem.use_segnum
            self.dom = None
            if not os.path.exists(fileItem.filepath):
                raise exceptions.FileAccessError("Cannot find file %s",
                                                 fileItem.filepath)
            try:
                self.dom = xml.dom.minidom.parse(fileItem.filepath)
            except (xml.parsers.expat.ExpatError, IOError) as exc:
                raise exceptions.FileAccessError("Error reading file %s\n\n%s",
                                                 fileItem.filepath,
                                                 str(exc).capitalize())
            logger.debug("Parse finished.")
            progressRange.updatePart(1)
            filetype = self.get_filetype(fileItem.filepath, self.dom)
            progressRange.updatePart(2)

            prevLen = len(self.data)
            if filetype == "toolbox":
                ToolboxXML(self).read()
            elif filetype == "fieldworks":
                FieldworksXML(self).read()
            logger.debug("Read %d examples.", len(self.data))
            if len(self.data) == prevLen:
                raise exceptions.DataNotFoundError(
                    "Did not find any data in file %s", fileItem.filepath)
            progressRange.update(list_index)
            list_index += 1
Пример #15
0
 def make_temp_dir(self):
     """Make temporary directory to extract .odt file contents."""
     self.tempBaseDir = os.path.join(self.outdir, 'OOLT Converted Files')
     if not os.path.exists(self.tempBaseDir):
         try:
             os.makedirs(self.tempBaseDir)
         except OSError:
             raise exceptions.FileAccessError(
                 "Could not create temporary folder %s", self.tempBaseDir)
     MAX_FOLDERS = 1000
     for folderNum in range(1, MAX_FOLDERS):
         tempDirCandidate = os.path.join(self.tempBaseDir,
                                         "%03d" % folderNum)
         if not os.path.exists(tempDirCandidate):
             self.tempDir = tempDirCandidate
             break
     if not self.tempDir:
         raise exceptions.FileAccessError(
             "Too many temporary folders in %s.", self.tempBaseDir)
     try:
         os.mkdir(self.tempDir)
     except OSError:
         raise exceptions.FileAccessError(
             "Could not create temporary folder %s", self.tempDir)
 def loadDoc(self, filepath):
     """Sets self.calcUnoObjs to a loaded Calc doc.
     File will open minimized if not already open.
     """
     logger.debug("Opening file %s", filepath)
     if not os.path.exists(filepath):
         raise exceptions.FileAccessError("Cannot find file %s", filepath)
     fileUrl = uno.systemPathToFileUrl(os.path.realpath(filepath))
     uno_args = (util.createProp("Minimized", True), )
     newDoc = self.unoObjs.desktop.loadComponentFromURL(
         fileUrl, "_default", 0, uno_args)
     try:
         self.calcUnoObjs = self.unoObjs.getDocObjs(
             newDoc, doctype=util.UnoObjs.DOCTYPE_CALC)
     except AttributeError:
         raise exceptions.DocAccessError()
     self.calcUnoObjs.window.setVisible(True)  # otherwise it will be hidden
     logger.debug("Opened file.")
 def get_filetype(self):
     """Determines file type based on extension.
     Does not read file contents.
     """
     logger.debug(util.funcName('begin'))
     filename = os.path.basename(self.filepath)
     filetype = ""
     if re.search(r"\.paxml$", filename):
         filetype = 'paxml'
     elif re.search(r"\.lift$", filename):
         filetype = 'lift'
     elif re.search(r"\.xml$", filename):
         filetype = 'xml'
     else:
         raise exceptions.FileAccessError("Unknown file type for %s",
                                          filename)
     logger.debug("File type %s.", filetype)
     return filetype
 def loadDoc(self, filepath):
     logger.debug(util.funcName('begin', args=filepath))
     if not os.path.exists(filepath):
         raise exceptions.FileAccessError("Cannot find file %s", filepath)
     fileUrl = uno.systemPathToFileUrl(os.path.realpath(filepath))
     uno_args = (
         util.createProp("Minimized", True),
         # Setting a filter makes some files work but then .odt fails.
         # Instead just have the user open the file first.
         #util.createProp("FilterName", "Text"),
     )
     # Loading the document hidden was reported to frequently crash
     #       before OOo 2.0.  It seems to work fine now though.
     newDoc = self.unoObjs.desktop.loadComponentFromURL(
         fileUrl, "_default", 0, uno_args)
     try:
         self.doc = self.unoObjs.getDocObjs(newDoc)
     except AttributeError:
         raise exceptions.DocAccessError()
     logger.debug(util.funcName('end'))
 def _read(self):
     """Harvest data by grabbing word strings from one or more columns."""
     try:
         self.loadDoc(self.filepath)
     except (exceptions.FileAccessError, exceptions.DocAccessError):
         raise exceptions.FileAccessError("Error reading file %s",
                                          self.filepath)
     reader = SpreadsheetReader(self.calcUnoObjs)
     self.progressBar.updatePercent(60)
     for whatToGrab in self.fileconfig.thingsToGrab:
         if whatToGrab.grabType == wordlist_structs.WhatToGrab.COLUMN:
             stringList = reader.getColumnStringList(
                 whatToGrab.whichOne, self.fileconfig.skipFirstRow)
             for text in stringList:
                 if text != "":
                     ## Add word
                     word = wordlist_structs.WordInList()
                     word.text = text
                     word.source = self.filepath
                     self.data.append(word)
     logger.debug("Setting visible.")
     self.calcUnoObjs.window.setVisible(True)
 def setConverter(self, newConfig=None):
     """Initialize a converter to the specified values.
     :param newConfig: type ConverterSettings
     """
     logger.debug(util.funcName('begin'))
     if not newConfig:
         # Useful for multiple converter objects with different settings.
         newConfig = self.config
     self.loadLibrary()
     if not self.funcIsEcInstalled():
         raise exceptions.FileAccessError(
             "EncConverters does not seem to be installed properly.")
     c_convName = getStringParam(newConfig.convName)
     if c_convName is None:
         raise exceptions.LogicError("No converter was specified.")
     c_forward = ctypes.c_bool(newConfig.forward)
     c_normForm = ctypes.c_ushort(newConfig.normForm)
     logger.debug("calling funcInitConverter with %r", newConfig)
     status = self.funcInitConverter(c_convName, c_forward, c_normForm)
     verifyStatusOk(status)
     self.config = newConfig
     logger.debug(util.funcName('end'))
Пример #21
0
    def read_sfm_file(self):
        """
        Grabs a flat list of marker data, not organized by records of
        several markers.

        This should work whether self.fileconfig contains one field with
        several markers, or several fields with one marker each, or some
        combination of the two.

        Modifies self.rawData
        """
        logger.debug("reading SFM file")
        infile = io.open(self.filepath, mode='r', encoding='UTF8')

        sfMarkerList = list()
        for whatToGrab in self.fileconfig.thingsToGrab:
            if whatToGrab.grabType == wordlist_structs.WhatToGrab.SFM:
                sfMarkerList.extend(whatToGrab.whichOne.split())

        lineNum = 1
        try:
            for line in infile:
                logger.debug("Line #%d.", lineNum)
                lineNum += 1
                for marker in sfMarkerList:
                    markerWithSpace = marker + " "
                    if line.startswith(markerWithSpace):
                        logger.debug("^%s", markerWithSpace)
                        data = line[len(markerWithSpace):]
                        data = data.strip() # is this needed?
                        self.rawData.append((marker, data))
        except UnicodeDecodeError as exc:
            raise exceptions.FileAccessError(
                "Error reading file %s\n\n%s",
                self.filepath, str(exc))
        finally:
            infile.close()
        logger.debug("Found %d words.", len(self.rawData))
    def loadLibrary(self):
        """Load ECDriver library."""
        if self.loaded:
            return
        if platform.system() == "Windows":
            libfile = "ECDriver"
            wide = 'W'  # use functions named with 'W' for wide characters
            logger.debug("Loading %s", libfile)
            try:
                try:
                    # Python 3.8 and newer
                    # It is recommended to use os.add_dll_directory("..."),
                    # but that would not search with environment variables. See
                    # docs.python.org/3/whatsnew/3.8.html#bpo-36085-whatsnew
                    libecdriver = ctypes.CDLL(libfile, winmode=0)
                except TypeError:
                    # Python 3.7 or older
                    libecdriver = ctypes.cdll.LoadLibrary(libfile)
            except OSError as exc:
                raise exceptions.FileAccessError("Library error: %s.", exc)
        else:
            wide = ''
            ## Look for libecdriver.so in order of location precedence.
            liblocs = [(prefix, dirname, libname)
                       for dirname in ("encConverters", "fieldworks")
                       for prefix in ("/usr/local", "/usr")
                       for libname in ("libecdriver.so", "libecdriver_64.so")]
            libfile = ""
            for prefix, dirname, libname in liblocs:
                filepath = os.path.join(prefix, "lib", dirname, libname)
                if os.path.exists(filepath):
                    libfile = filepath
                    break
            if not libfile:
                # Perhaps it is in current dir, LD_LIBRARY_PATH or ldconfig.
                libfile = "libecdriver.so"
            logger.debug("Loading %s", libfile)
            try:
                libecdriver = ctypes.cdll.LoadLibrary(libfile)
            except OSError as exc:
                raise exceptions.FileAccessError("Library error: %s.", exc)

        logger.debug("Getting functions from library")
        try:
            self.funcIsEcInstalled = libecdriver.IsEcInstalled
            self.funcSelectConverter = getattr(
                libecdriver, 'EncConverterSelectConverter' + wide)
            self.funcInitConverter = getattr(
                libecdriver, 'EncConverterInitializeConverter' + wide)
            self.funcConvertString = getattr(
                libecdriver, 'EncConverterConvertString' + wide)
            self.funcDescription = getattr(
                libecdriver, 'EncConverterConverterDescription' + wide)
            if platform.system() == "Linux":
                self.funcCleanup = libecdriver.Cleanup
        except AttributeError as exc:
            raise exceptions.FileAccessError("Library error: %s.", exc)
        logger.debug("Library successfully loaded.")
        try:
            self.funcAddConverter = getattr(libecdriver,
                                            'EncConverterAddConverter' + wide)
        except AttributeError as exc:
            logger.warning("Could not load AddConverter function.")
        self.loaded = True