def read(self, containerList, selectList=None, excludeFlag=False): """ Appends to input list of definition and data containers. return """ sL = selectList if selectList else [] catSelectD = {k: k for k in sL} self.__curLineNumber = 0 try: self.__parser(self.__tokenizer(self.__ifh), containerList, categorySelectionD=catSelectD, excludeFlag=excludeFlag) except RuntimeError as e: # will be raised at the end of token iterator - not an error - logger.debug("Normal termination after reading %d lines with %s", self.__curLineNumber, str(e)) except StopIteration: # will be raised at the end of token iterator - not an error - logger.debug("Normal termination after reading %d lines", self.__curLineNumber) except PdbxSyntaxError as e: logger.debug("Caught syntax exception at %d", self.__curLineNumber) raise e except UnicodeDecodeError as e: logger.debug("Caught character encoding exception at %d with %s", self.__curLineNumber, str(e)) raise PdbxError("Character encoding error at line %d" % self.__curLineNumber) except Exception as e: raise PdbxError("Failing at line %d with %s" % (self.__curLineNumber, str(e))) else: raise PdbxError("Miscellaneous parsing error at line %d" % self.__curLineNumber)
def __processReadLogFile(self, inputFilePath): """Internal method to process logfiles and either log errors or raise exceptions (See: Class PdbxExceptions). The behavior is controlled by the class attribute _raiseExcetions. Returns: list of strings: List of records in the input log file """ diagL = self._readLogRecords() # if diagL: numErrors = 0 numSyntaxErrors = 0 numWarnings = 0 for diag in diagL: if "ERROR" in diag: numErrors += 1 if "WARN" in diag: numWarnings += 1 if "syntax" in diag.lower(): numSyntaxErrors += 1 # logger.debug("%s syntax errors %d warnings %d all errors %d", inputFilePath, numSyntaxErrors, numWarnings, numErrors) # if numSyntaxErrors and self._raiseExceptions: raise PdbxSyntaxError("%s syntax errors %d all errors %d" % (inputFilePath, numSyntaxErrors, numErrors)) elif numErrors and self._raiseExceptions: raise PdbxError("%s error count is %d" % (inputFilePath, numErrors)) elif numErrors: logger.error("%s syntax errors %d all errors %d", inputFilePath, numSyntaxErrors, numErrors) if numWarnings: logger.warning("%s warnings %d", inputFilePath, numWarnings) return diagL
def _logError(self, msg): """Convenience method to log error messages and optionally raise general exceptions (PdbxError).""" self._appendToLog([msg]) if self._raiseExceptions: raise PdbxError(msg) else: logger.error(msg)
def _fileExists(self, filePath): """Verify that input file path exists and is readable.""" try: if not os.access(filePath, os.R_OK): msg = "Missing file %r" % filePath self._appendToLog([msg]) logger.error(msg) # if self._raiseExceptions: raise PdbxError(msg) return False else: logger.debug("Reading from file path %s", filePath) return True except Exception as e: msg = "File check error for %r with %s " % (filePath, str(e)) self._appendToLog([msg]) if self._raiseExceptions: raise PdbxError(msg) else: logger.error(msg) return False
def writeContainer(self, container, lastInOrder=None, selectOrder=None): indS = " " * self.__indentDefinition if container.getType() == "definition": self.__write("save_%s" % container.getName()) # self.__write("save_%s\n" % container.getName()) self.__doDefinitionIndent = True # self.__write(indS + "#\n") elif container.getType() == "data": if container.getGlobal(): self.__write("global_\n") self.__doDefinitionIndent = False self.__write("\n") else: self.__write("data_%s\n" % container.getName()) self.__doDefinitionIndent = False # self.__write("#\n") nmL = container.filterObjectNameList(lastInOrder=lastInOrder, selectOrder=selectOrder) for nm in nmL: obj = container.getObj(nm) objL = obj.getRowList() # Skip empty objects if not objL: continue # Item - value formattting elif len(objL) == 1: self.__writeItemValueFormat(obj) # Table formatting - elif objL and obj.getAttributeList(): if self.__useAlignedColumns: self.__writeTableFormat(obj) else: self.__writeTable(obj) else: raise PdbxError("") if self.__doDefinitionIndent: self.__write(indS + "#") else: self.__write("#") # Add a trailing saveframe reserved word if container.getType() == "definition": self.__write("\nsave_\n") self.__write("#\n")
def _uncompress(self, inputFilePath, outputDir): """ Uncompress the input file if the path name has a recognized compression type file extension.file Return the file name ofthe uncompressed file (in outDir) or the original input file path. """ try: startTime = time.time() fp, fn = os.path.split(inputFilePath) bn, fx = os.path.splitext(fn) outputFilePath = os.path.join(outputDir, bn) if inputFilePath.endswith(".gz"): with gzip.open(inputFilePath, mode='rb') as inp_f: with io.open(outputFilePath, "wb") as out_f: shutil.copyfileobj(inp_f, out_f) elif inputFilePath.endswith(".bz2"): with bz2.open(inputFilePath, mode='rb') as inp_f: with io.open(outputFilePath, "wb") as out_f: shutil.copyfileobj(inp_f, out_f) # elif inputFilePath.endswith(".xz"): # with lzma.open(inputFilePath, mode="rb") as inp_f: # with io.open(outputFilePath, "wb") as out_f: # shutil.copyfileobj(inp_f, out_f) elif inputFilePath.endswith(".zip"): with zipfile.ZipFile(inputFilePath, mode='rb') as inp_f: with io.open(outputFilePath, "wb") as out_f: shutil.copyfileobj(inp_f, out_f) else: outputFilePath = inputFilePath if self._timing: stepTime1 = time.time() logger.info( "Timing text file %s uncompressed in %.4f seconds" % (inputFilePath, stepTime1 - startTime)) # except Exception as e: msg = "Failing uncompress for file %s with %s" % (inputFilePath, str(e)) self._appendToLog([msg]) logger.exception(msg) if self._raiseExceptions: raise PdbxError(msg) logger.debug("Returning file path %r" % outputFilePath) return outputFilePath
def _toAscii(self, inputFilePath, outputFilePath, chunkSize=5000, encodingErrors="ignore", readEncodingErrors="ignore"): """ Encode input file to Ascii and write this to the target output file. Handle encoding errors according to the input settting ('ignore', 'escape', 'xmlcharrefreplace'). """ try: startTime = time.time() chunk = [] with io.open(inputFilePath, "r", encoding="utf-8", errors=readEncodingErrors) as ifh, io.open( outputFilePath, "w", encoding="ascii") as ofh: for line in ifh: # chunk.append(line.encode('ascii', 'xmlcharrefreplace').decode('ascii')) chunk.append( line.encode("ascii", encodingErrors).decode("ascii")) if len(chunk) == chunkSize: ofh.writelines(chunk) chunk = [] ofh.writelines(chunk) if self._timing: stepTime1 = time.time() logger.info( "Timing text file %s encoded to as ascii in %.4f seconds", inputFilePath, stepTime1 - startTime) return True except Exception as e: msg = "Failing text ascii encoding for %s with %s" % ( inputFilePath, str(e)) self._appendToLog([msg]) logger.error(msg) if self._raiseExceptions: raise PdbxError(msg) # return False