class _BibleBooksCodesConverter: """ Class for reading, validating, and converting BibleBooksCodes. This is only intended as a transitory class (used at start-up). The BibleBooksCodes class has functions more generally useful. """ def __init__( self ): # We can't give this parameters because of the singleton """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = "BibleBooksCodes" # These fields are used for parsing the XML self._treeTag = "BibleBooksCodes" self._headerTag = "header" self._mainElementTag = "BibleBookCodes" # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = () self._optionalAttributes = () self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes self._compulsoryElements = ( "nameEnglish", "referenceAbbreviation", "referenceNumber" ) self._optionalElements = ( "expectedChapters", "SBLAbbreviation", "OSISAbbreviation", "SwordAbbreviation", "CCELNumber", "ParatextAbbreviation", "ParatextNumber", "NETBibleAbbreviation", "ByzantineAbbreviation", "possibleAlternativeBooks" ) #self._uniqueElements = self._compulsoryElements + self.optionalElements self._uniqueElements = self._compulsoryElements # Relax the checking # These are fields that we will fill later self._XMLheader, self._XMLtree = None, None self.__DataDicts = {} # Used for import self.titleString = self.versionString = self.dateString = '' # end of __init__ def loadAndValidate( self, XMLFilepath=None ): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( "DataFiles", self._filenameBase + ".xml" ) self.__load( XMLFilepath ) if Globals.strictCheckingFlag: self.__validate() else: # The data must have been already loaded if XMLFilepath is not None and XMLFilepath!=self.__XMLFilepath: logging.error( _("Bible books codes are already loaded -- your different filepath of '{}' was ignored").format( XMLFilepath ) ) return self # end of loadAndValidate def __load( self, XMLFilepath ): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert( XMLFilepath ) self.__XMLFilepath = XMLFilepath assert( self._XMLtree is None or len(self._XMLtree)==0 ) # Make sure we're not doing this twice if Globals.verbosityLevel > 2: print( _("Loading BibleBooksCodes XML file from '{}'...").format( self.__XMLFilepath ) ) self._XMLtree = ElementTree().parse( self.__XMLFilepath ) assert( self._XMLtree ) # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.XMLheader = header self._XMLtree.remove( header ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] if work.tag == "work": self.versionString = work.find("version").text self.dateString = work.find("date").text self.titleString = work.find("title").text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for '{}' tag)".format( self._headerTag ) ) ) if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected '{}' tail data after header").format( element.tail ) ) else: logging.error( _("Expected to load '{}' but got '{}'").format( self._treeTag, self._XMLtree.tag ) ) # end of __load def __validate( self ): """ Check/validate the loaded data. """ assert( self._XMLtree ) uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] expectedID = 1 for j,element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory '{}' attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) ) if not attributeValue: logging.warning( _("Compulsory '{}' attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional '{}' attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional '{}' attribute ('{}') found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( _("Found '{}' data repeated in '{}' field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) # Get the referenceAbbreviation to use as a record ID ID = element.find("referenceAbbreviation").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find( elementName ) is None: logging.error( _("Compulsory '{}' element is missing in record with ID '{}' (record {})").format( elementName, ID, j ) ) elif not element.find( elementName ).text: logging.warning( _("Compulsory '{}' element is blank in record with ID '{}' (record {})").format( elementName, ID, j ) ) # Check optional elements for elementName in self._optionalElements: if element.find( elementName ) is not None: if not element.find( elementName ).text: logging.warning( _("Optional '{}' element is blank in record with ID '{}' (record {})").format( elementName, ID, j ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional '{}' element ('{}') found in record with ID '{}' (record {})").format( subelement.tag, subelement.text, ID, j ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+elementName]: logging.error( _("Found '{}' data repeated in '{}' element in record with ID '{}' (record {})").format( text, elementName, ID, j ) ) uniqueDict["Element_"+elementName].append( text ) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) ) if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected '{}' tail data after {} element in record {}").format( element.tail, element.tag, j ) ) if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected '{}' tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag ) ) # end of __validate def __str__( self ): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ indent = 2 result = "_BibleBooksCodesConverter object" if self.titleString: result += ('\n' if result else '') + ' '*indent + _("Title: {}").format( self.titleString ) if self.versionString: result += ('\n' if result else '') + ' '*indent + _("Version: {}").format( self.versionString ) if self.dateString: result += ('\n' if result else '') + ' '*indent + _("Date: {}").format( self.dateString ) if self._XMLtree is not None: result += ('\n' if result else '') + ' '*indent + _("Num entries = {}").format( len(self._XMLtree) ) return result # end of __str__ def importDataToPython( self ): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ assert( self._XMLtree ) if self.__DataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__DataDicts # We'll create a number of dictionaries with different elements as the key myIDDict,myRADict, mySBLDict,myOADict,mySwDict,myCCELDict,myPADict,myPNDict,myNETDict,myBzDict, myENDict = OrderedDict(),OrderedDict(), {},{},{},{},{},{},{},{}, {} for element in self._XMLtree: # Get the required information out of the tree for this element # Start with the compulsory elements nameEnglish = element.find("nameEnglish").text # This name is really just a comment element referenceAbbreviation = element.find("referenceAbbreviation").text if referenceAbbreviation.upper() != referenceAbbreviation: logging.error( _("Reference abbreviation '{}' should be UPPER CASE").format( referenceAbbreviation ) ) ID = element.find("referenceNumber").text intID = int( ID ) # The optional elements are set to None if they don't exist expectedChapters = None if element.find("expectedChapters") is None else element.find("expectedChapters").text SBLAbbreviation = None if element.find("SBLAbbreviation") is None else element.find("SBLAbbreviation").text OSISAbbreviation = None if element.find("OSISAbbreviation") is None else element.find("OSISAbbreviation").text SwordAbbreviation = None if element.find("SwordAbbreviation") is None else element.find("SwordAbbreviation").text CCELNumberString = None if element.find("CCELNumber") is None else element.find("CCELNumber").text #CCELNumber = int( CCELNumberString ) if CCELNumberString else -1 ParatextAbbreviation = None if element.find("ParatextAbbreviation") is None else element.find("ParatextAbbreviation").text ParatextNumberString = None if element.find("ParatextNumber") is None else element.find("ParatextNumber").text #ParatextNumber = int( ParatextNumberString ) if ParatextNumberString else -1 NETBibleAbbreviation = None if element.find("NETBibleAbbreviation") is None else element.find("NETBibleAbbreviation").text ByzantineAbbreviation = None if element.find("ByzantineAbbreviation") is None else element.find("ByzantineAbbreviation").text possibleAlternativeBooks = None if element.find("possibleAlternativeBooks") is None else element.find("possibleAlternativeBooks").text # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Add .upper() if you require the abbreviations to be uppercase (or .lower() for lower case) # The referenceAbbreviation is UPPER CASE by definition if "referenceAbbreviation" in self._compulsoryElements or referenceAbbreviation: if "referenceAbbreviation" in self._uniqueElements: assert( referenceAbbreviation not in myRADict ) # Shouldn't be any duplicates #myRADict[referenceAbbreviation] = ( intID, SBLAbbreviation, OSISAbbreviation, SwordAbbreviation, CCELNumberString, ParatextAbbreviation, ParatextNumberString, NETBibleAbbreviation, ByzantineAbbreviation, expectedChapters, possibleAlternativeBooks, nameEnglish, ) myRADict[referenceAbbreviation] = { "referenceNumber":intID, "SBLAbbreviation":SBLAbbreviation, "OSISAbbreviation":OSISAbbreviation, "SwordAbbreviation":SwordAbbreviation, "CCELNumberString":CCELNumberString, "ParatextAbbreviation":ParatextAbbreviation, "ParatextNumberString":ParatextNumberString, "NETBibleAbbreviation":NETBibleAbbreviation, "ByzantineAbbreviation":ByzantineAbbreviation, "numExpectedChapters":expectedChapters, "possibleAlternativeBooks":possibleAlternativeBooks, "nameEnglish":nameEnglish } if "referenceNumber" in self._compulsoryElements or ID: if "referenceNumber" in self._uniqueElements: assert( intID not in myIDDict ) # Shouldn't be any duplicates #myIDDict[intID] = ( referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, SwordAbbreviation, CCELNumberString, ParatextAbbreviation, ParatextNumberString, NETBibleAbbreviation, ByzantineAbbreviation, expectedChapters, possibleAlternativeBooks, nameEnglish, ) myIDDict[intID] = { "referenceAbbreviation":referenceAbbreviation, "SBLAbbreviation":SBLAbbreviation, "OSISAbbreviation":OSISAbbreviation, "SwordAbbreviation":SwordAbbreviation, "CCELNumberString":CCELNumberString, "ParatextAbbreviation":ParatextAbbreviation, "ParatextNumberString":ParatextNumberString, "NETBibleAbbreviation":NETBibleAbbreviation, "ByzantineAbbreviation":ByzantineAbbreviation, "numExpectedChapters":expectedChapters, "possibleAlternativeBooks":possibleAlternativeBooks, "nameEnglish":nameEnglish } if "SBLAbbreviation" in self._compulsoryElements or SBLAbbreviation: if "SBLAbbreviation" in self._uniqueElements: ssert( SBLAbbreviation not in myOADict ) # Shouldn't be any duplicates mySBLDict[SBLAbbreviation] = ( intID, referenceAbbreviation, ) if "OSISAbbreviation" in self._compulsoryElements or OSISAbbreviation: if "OSISAbbreviation" in self._uniqueElements: assert( OSISAbbreviation not in myOADict ) # Shouldn't be any duplicates myOADict[OSISAbbreviation] = ( intID, referenceAbbreviation ) if "SwordAbbreviation" in self._compulsoryElements or SwordAbbreviation: if "SwordAbbreviation" in self._uniqueElements: assert( SwordAbbreviation not in mySwDict ) # Shouldn't be any duplicates mySwDict[SwordAbbreviation] = ( intID, referenceAbbreviation, ) if "CCELNumberString" in self._compulsoryElements or CCELNumberString: if "CCELNumberString" in self._uniqueElements: assert( CCELNumberString not in myCCELDict ) # Shouldn't be any duplicates myCCELDict[CCELNumberString] = ( intID, referenceAbbreviation, ) if "ParatextAbbreviation" in self._compulsoryElements or ParatextAbbreviation: if "ParatextAbbreviation" in self._uniqueElements: assert( ParatextAbbreviation not in myPADict ) # Shouldn't be any duplicates myPADict[ParatextAbbreviation] = ( intID, referenceAbbreviation, ParatextNumberString, ) if "ParatextNumberString" in self._compulsoryElements or ParatextNumberString: if "ParatextNumberString" in self._uniqueElements: assert( ParatextNumberString not in myPNDict ) # Shouldn't be any duplicates myPNDict[ParatextNumberString] = ( intID, referenceAbbreviation, ParatextAbbreviation, ) if "NETBibleAbbreviation" in self._compulsoryElements or NETBibleAbbreviation: if "NETBibleAbbreviation" in self._uniqueElements: assert( NETBibleAbbreviation not in myBzDict ) # Shouldn't be any duplicates myNETDict[NETBibleAbbreviation] = ( intID, referenceAbbreviation, ) if "ByzantineAbbreviation" in self._compulsoryElements or ByzantineAbbreviation: if "ByzantineAbbreviation" in self._uniqueElements: assert( ByzantineAbbreviation not in myBzDict ) # Shouldn't be any duplicates myBzDict[ByzantineAbbreviation] = ( intID, referenceAbbreviation, ) if "nameEnglish" in self._compulsoryElements or ParatextNumberString: if "nameEnglish" in self._uniqueElements: assert( nameEnglish not in myENDict ) # Shouldn't be any duplicates myENDict[nameEnglish] = ( intID, referenceAbbreviation ) self.__DataDicts = { "referenceNumberDict":myIDDict, "referenceAbbreviationDict":myRADict, "SBLDict":mySBLDict, "OSISAbbreviationDict":myOADict, "SwordAbbreviationDict":mySwDict, "CCELDict":myCCELDict, "ParatextAbbreviationDict":myPADict, "ParatextNumberDict":myPNDict, "NETBibleAbbreviationDict":myNETDict, "ByzantineAbbreviationDict":myBzDict, "EnglishNameDict":myENDict } return self.__DataDicts # Just delete any of the dictionaries that you don't need # end of importDataToPython def exportDataToPython( self, filepath=None ): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ): """Exports theDict to theFile.""" for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) break # We only check the first (random) entry we get theFile.write( "{} = {{\n # Key is {}\n # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment ) ) for dictKey in sorted(theDict.keys()): theFile.write( ' {}: {},\n'.format( repr(dictKey), theDict[dictKey] ) ) theFile.write( "}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDict from datetime import datetime assert( self._XMLtree ) self.importDataToPython() assert( self.__DataDicts ) if not filepath: filepath = os.path.join( "DerivedFiles", self._filenameBase + "_Tables.py" ) if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) with open( filepath, 'wt' ) as myFile: myFile.write( "# {}\n#\n".format( filepath ) ) myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( versionString, datetime.now() ) ) if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) if self.versionString: myFile.write( "# Version: {}\n".format( self.versionString ) ) if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) mostEntries = "0=referenceNumber (integer 1..255), 1=referenceAbbreviation/BBB (3-uppercase characters)" dictInfo = { "referenceNumberDict":("referenceNumber (integer 1..255)","specified"), "referenceAbbreviationDict":("referenceAbbreviation","specified"), "CCELDict":("CCELNumberString",mostEntries), "SBLDict":("SBLAbbreviation",mostEntries), "OSISAbbreviationDict":("OSISAbbreviation",mostEntries), "SwordAbbreviationDict":("SwordAbbreviation",mostEntries), "ParatextAbbreviationDict":("ParatextAbbreviation",mostEntries), "ParatextNumberDict":("ParatextNumberString",mostEntries), "NETBibleAbbreviationDict":("NETBibleAbbreviation",mostEntries), "ByzantineAbbreviationDict":("ByzantineAbbreviation",mostEntries), "EnglishNameDict":("nameEnglish",mostEntries) } for dictName,dictData in self.__DataDicts.items(): exportPythonDict( myFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] ) myFile.write( "# end of {}".format( os.path.basename(filepath) ) ) # end of exportDataToPython def exportDataToJSON( self, filepath=None ): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ from datetime import datetime import json assert( self._XMLtree ) self.importDataToPython() assert( self.__DataDicts ) if not filepath: filepath = os.path.join( "DerivedFiles", self._filenameBase + "_Tables.json" ) if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) with open( filepath, 'wt' ) as myFile: #myFile.write( "# {}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( versionString, datetime.now() ) ) #if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) #if self.versionString: myFile.write( "# Version: {}\n".format( self.versionString ) ) #if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) #myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) json.dump( self.__DataDicts, myFile, indent=2 ) #myFile.write( "\n\n# end of {}".format( os.path.basename(filepath) ) ) # end of exportDataToJSON def exportDataToC( self, filepath=None ): """ Writes the information tables to a .h and .c files that can be included in c and c++ programs. NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically. """ def exportPythonDict( hFile, cFile, theDict, dictName, sortedBy, structure ): """ Exports theDict to the .h and .c files. """ def convertEntry( entry ): """ Convert special characters in an entry... """ result = "" if isinstance( entry, tuple ): for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'").format( field, entry ) ) elif isinstance( entry, dict ): for key in sorted(entry.keys()): field = entry[key] if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'").format( field, entry ) ) else: logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry) ) ) return result # end of convertEntry for dictKey in theDict.keys(): # Have to iterate this :( fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count break # We only check the first (random) entry we get #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) ) hFile.write( "typedef struct {}EntryStruct {{\n".format( dictName ) ) for declaration in structure.split(';'): adjDeclaration = declaration.strip() if adjDeclaration: hFile.write( " {};\n".format( adjDeclaration ) ) hFile.write( "}} {}Entry;\n\n".format( dictName ) ) cFile.write( "const static {}Entry\n {}[{}] = {{\n // Fields ({}) are {}\n // Sorted by {}\n".format( dictName, dictName, len(theDict), fieldsCount, structure, sortedBy ) ) for dictKey in sorted(theDict.keys()): if isinstance( dictKey, str ): cFile.write( " {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) ) elif isinstance( dictKey, int ): cFile.write( " {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) ) else: logging.error( _("Can't handle this type of key data yet: {}").format( dictKey ) ) cFile.write( "]}}; // {} ({} entries)\n\n".format( dictName, len(theDict) ) ) # end of exportPythonDict from datetime import datetime assert( self._XMLtree ) self.importDataToPython() assert( self.__DataDicts ) if not filepath: filepath = os.path.join( "DerivedFiles", self._filenameBase + "_Tables" ) hFilepath = filepath + '.h' cFilepath = filepath + '.c' if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( cFilepath ) ) # Don't bother telling them about the .h file ifdefName = self._filenameBase.upper() + "_Tables_h" with open( hFilepath, 'wt' ) as myHFile, open( cFilepath, 'wt' ) as myCFile: myHFile.write( "// {}\n//\n".format( hFilepath ) ) myCFile.write( "// {}\n//\n".format( cFilepath ) ) lines = "// This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n//\n".format( versionString, datetime.now() ) myHFile.write( lines ); myCFile.write( lines ) if self.titleString: lines = "// {} data\n".format( self.titleString ) myHFile.write( lines ); myCFile.write( lines ) if self.versionString: lines = "// Version: {}\n".format( self.versionString ) myHFile.write( lines ); myCFile.write( lines ) if self.dateString: lines = "// Date: {}\n//\n".format( self.dateString ) myHFile.write( lines ); myCFile.write( lines ) myCFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) ) myHFile.write( "\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) ) myCFile.write( '#include "{}"\n\n'.format( os.path.basename(hFilepath) ) ) CHAR = "const unsigned char" BYTE = "const int" dictInfo = { "referenceNumberDict":("referenceNumber (integer 1..255)", "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} referenceAbbreviation[3+1];" .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ), "referenceAbbreviationDict":("referenceAbbreviation", "{} referenceAbbreviation[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;" .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ), "CCELDict":("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ), "SBLDict":("SBLAbbreviation", "{}* SBLAbbreviation; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ), "OSISAbbreviationDict":("OSISAbbreviation", "{}* OSISAbbreviation; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ), "SwordAbbreviationDict":("SwordAbbreviation", "{}* SwordAbbreviation; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ), "ParatextAbbreviationDict":("ParatextAbbreviation", "{} ParatextAbbreviation[3+1]; {} referenceNumber; {} referenceAbbreviation[3+1]; {} ParatextNumberString[2+1];".format(CHAR,BYTE,CHAR,CHAR) ), "ParatextNumberDict":("ParatextNumberString", "{} ParatextNumberString[2+1]; {} referenceNumber; {} referenceAbbreviation[3+1]; {} ParatextAbbreviation[3+1];".format(CHAR,BYTE,CHAR,CHAR) ), "NETBibleAbbreviationDict":("NETBibleAbbreviation", "{}* NETBibleAbbreviation; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ), "ByzantineAbbreviationDict":("ByzantineAbbreviation", "{}* ByzantineAbbreviation; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ), "EnglishNameDict":("nameEnglish", "{}* nameEnglish; {} referenceNumber; {} referenceAbbreviation[3+1];".format(CHAR,BYTE,CHAR) ) } for dictName,dictData in self.__DataDicts.items(): exportPythonDict( myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] ) myHFile.write( "#endif // {}\n\n".format( ifdefName ) ) myHFile.write( "// end of {}".format( os.path.basename(hFilepath) ) ) myCFile.write( "// end of {}".format( os.path.basename(cFilepath) ) )
class _BibleOrganizationalSystemsConverter: """ Class for handling and converting BibleOrganizationalSystems. """ def __init__( self ): """ Constructor: expects the filepath of the source XML file. Loads (and crudely validates the XML file) into an element tree. """ self._filenameBase = "BibleOrganizationalSystems" # These fields are used for parsing the XML self._treeTag = "BibleOrganizationalSystems" self._headerTag = "header" self._mainElementTag = "BibleOrganizationalSystem" # These fields are used for automatically checking/validating the XML self._compulsoryAttributes = ( "type", ) self._optionalAttributes = () self._uniqueAttributes = () self._compulsoryElements = ( "referenceAbbreviation", "languageCode", ) self._optionalElements = ( "name", "publicationDate", "versificationSystem", "punctuationSystem", "bookOrderSystem", "booksNamesSystem", "derivedFrom", "usesText", ) self._uniqueElements = () self._allowedMultiple = ( "name", ) # These are fields that we will fill later self.title, self.version, self.date = None, None, None self.header, self._XMLtree = None, None self.__dataDicts = None # Get the data tables that we need for proper checking self._ISOLanguages = ISO_639_3_Languages().loadData() self._BibleBooksCodes = BibleBooksCodes().loadData() self._BibleBookOrderSystems = BibleBookOrderSystems().loadData() self._BiblePunctuationSystems = BiblePunctuationSystems().loadData() self._BibleVersificationSystems = BibleVersificationSystems().loadData() self._BibleBooksNamesSystems = BibleBooksNamesSystems().loadData() # end of __init__ def __str__( self ): """ This method returns the string representation of a Bible book code. @return: the name of a Bible object formatted as a string @rtype: string """ result = "" if self.title: result += ('\n' if result else '') + self.title if self.version: result += ('\n' if result else '') + " Version: {}".format( self.version ) if self.date: result += ('\n' if result else '') + " Date: {}".format( self.date ) result += ('\n' if result else '') + " Num entries = {}".format( len(self._XMLtree) ) return result # end of __str__ def loadAndValidate( self, XMLFilepath=None ): """ Loads (and crudely validates the XML file) into an element tree. Allows the filepath of the source XML file to be specified, otherwise uses the default. """ if self._XMLtree is None: # We mustn't have already have loaded the data if XMLFilepath is None: XMLFilepath = os.path.join( "DataFiles", self._filenameBase + ".xml" ) self._load( XMLFilepath ) if Globals.strictCheckingFlag: self._validate() return self # end of loadAndValidate def _load( self, XMLFilepath ): """ Load the source XML file and remove the header from the tree. Also, extracts some useful elements from the header element. """ assert( XMLFilepath ) self.XMLFilepath = XMLFilepath assert( self._XMLtree is None or len(self._XMLtree)==0 ) # Make sure we're not doing this twice if Globals.verbosityLevel > 2: print( _("Loading BibleOrganisationalSystems XML file from '{}'...").format( self.XMLFilepath ) ) self._XMLtree = ElementTree().parse( self.XMLFilepath ) assert( self._XMLtree ) # Fail here if we didn't load anything at all if self._XMLtree.tag == self._treeTag: header = self._XMLtree[0] if header.tag == self._headerTag: self.header = header self._XMLtree.remove( header ) if len(header)>1: logging.info( _("Unexpected elements in header") ) elif len(header)==0: logging.info( _("Missing work element in header") ) else: work = header[0] if work.tag == "work": self.version = work.find("version").text self.date = work.find("date").text self.title = work.find("title").text else: logging.warning( _("Missing work element in header") ) else: logging.warning( _("Missing header element (looking for '{}' tag)").format( self._headerTag ) ) else: logging.error( _("Expected to load '{}' but got '{}'").format( self._treeTag, self._XMLtree.tag ) ) # end of _load def _validate( self ): """ Check/validate the loaded data. """ assert( self._XMLtree ) uniqueDict = {} for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = [] for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = [] expectedID = 1 for j,element in enumerate(self._XMLtree): if element.tag == self._mainElementTag: # Check compulsory attributes on this main element for attributeName in self._compulsoryAttributes: attributeValue = element.get( attributeName ) if attributeValue is None: logging.error( _("Compulsory '{}' attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) ) if not attributeValue: logging.warning( _("Compulsory '{}' attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check optional attributes on this main element for attributeName in self._optionalAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if not attributeValue: logging.warning( _("Optional '{}' attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) ) # Check for unexpected additional attributes on this main element for attributeName in element.keys(): attributeValue = element.get( attributeName ) if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes: logging.warning( _("Additional '{}' attribute ('{}') found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) ) # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes) for attributeName in self._uniqueAttributes: attributeValue = element.get( attributeName ) if attributeValue is not None: if attributeValue in uniqueDict["Attribute_"+attributeName]: logging.error( _("Found '{}' data repeated in '{}' field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) ) uniqueDict["Attribute_"+attributeName].append( attributeValue ) ID = element.find("referenceAbbreviation").text # Check compulsory elements for elementName in self._compulsoryElements: if element.find( elementName ) is None: logging.error( _("Compulsory '{}' element is missing in record with ID '{}' (record {})").format( elementName, ID, j ) ) if not element.find( elementName ).text: logging.warning( _("Compulsory '{}' element is blank in record with ID '{}' (record {})").format( elementName, ID, j ) ) # Check optional elements for elementName in self._optionalElements: if element.find( elementName ) is not None: if not element.find( elementName ).text: logging.warning( _("Optional '{}' element is blank in record with ID '{}' (record {})").format( elementName, ID, j ) ) # Check for unexpected additional elements for subelement in element: if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements: logging.warning( _("Additional '{}' element ('{}') found in record with ID '{}' (record {})").format( subelement.tag, subelement.text, ID, j ) ) # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements) for elementName in self._uniqueElements: if element.find( elementName ) is not None: text = element.find( elementName ).text if text in uniqueDict["Element_"+elementName]: logging.error( _("Found '{}' data repeated in '{}' element in record with ID '{}' (record {})").format( text, elementName, ID, j ) ) uniqueDict["Element_"+elementName].append( text ) else: logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) ) # end of _validate def importDataToPython( self ): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLtree if you prefer.) """ assert( self._XMLtree ) if self.__dataDicts: # We've already done an import/restructuring -- no need to repeat it return self.__dataDicts # We'll create a number of dictionaries with different elements as the key dataDict, indexDict, combinedIndexDict = {}, {}, {} for element in self._XMLtree: bits = {} # Get the required information out of the tree for this element # Start with the compulsory elements and type attribute referenceAbbreviation = element.find("referenceAbbreviation").text bits["referenceAbbreviation"] = referenceAbbreviation myType = element.get( "type" ) bits["type"] = myType if myType not in allowedTypes: logging.error( _("Unrecognized '{}' type for '{}' (expected one of {})").format(myType,referenceAbbreviation,allowedTypes) ) languageCode = element.find("languageCode").text if self._ISOLanguages and not self._ISOLanguages.isValidLanguageCode( languageCode ): # Check that we have a valid language code logging.error( "Unrecognized '{}' ISO-639-3 language code in '{}' organisational system".format( languageCode, referenceAbbreviation ) ) bits["languageCode"] = languageCode # Now work on the optional elements for name in ( "name", "publicationDate", "versificationSystem", "punctuationSystem", "bookOrderSystem", "booksNamesSystem", "derivedFrom", "usesText", ): for nameData in element.findall(name): if name in self._allowedMultiple: # Put multiple entries into a list if name not in bits: bits[name] = [nameData.text] else: bits[name].append( nameData.text ) else: # Not allowed multiples if name in bits: logging.error( _("Multiple {} elements found in {} {}").format(name, referenceAbbreviation, myType) ) bits[name] = nameData.text extension = '_' + myType extendedRA = referenceAbbreviation if referenceAbbreviation.endswith(extension) else (referenceAbbreviation + extension) dataDict[extendedRA] = bits if referenceAbbreviation in indexDict: indexDict[referenceAbbreviation].append( extendedRA ) else: indexDict[referenceAbbreviation] = [extendedRA] if referenceAbbreviation in combinedIndexDict: combinedIndexDict[referenceAbbreviation].append( extendedRA ) else: combinedIndexDict[referenceAbbreviation] = [extendedRA] if extendedRA != referenceAbbreviation: assert( extendedRA not in combinedIndexDict ) combinedIndexDict[extendedRA] = [extendedRA] if Globals.strictCheckingFlag: # We'll do quite a bit more cross-checking now for extendedReferenceAbbreviation,data in dataDict.items(): #print( extendedReferenceAbbreviation, data ) systemType = data['type'] if systemType=='edition': if 'usesText' not in data: logging.error( _("{} edition doesn't specify 'usesText'").format( referenceAbbreviation ) ) if data['usesText'] not in indexDict: logging.error( _("{} edition specifies unknown '{}' text in 'usesText' field").format(referenceAbbreviation,data['usesText']) ) elif len(indexDict[data['usesText']]) > 1: # it could be ambiguous found = 0 for thisType in ('revision','translation','original'): # but not 'edition' usesTextExtended = data['usesText'] + '_' + thisType if usesTextExtended in dataDict: foundOne = usesTextExtended found += 1 assert( found > 0 ) if found==1: # ah, it's not actually ambiguous if Globals.verbosityLevel > 0: print( _("Adjusted text used for {} from the ambiguous '{}' to the extended name '{}'").format( extendedReferenceAbbreviation, data['usesText'], foundOne ) ) data['usesText'] = foundOne else: logging.warning( _("{} edition specifies ambiguous '{}' texts in 'usesText' field").format(referenceAbbreviation,indexDict[data['usesText']]) ) elif systemType=='revision': if 'derivedFrom' not in data: logging.error( _("{} revision doesn't specify 'derivedFrom'").format( referenceAbbreviation ) ) if data['derivedFrom'] not in indexDict: logging.error( _("{} revision specifies unknown '{}' text in 'derivedFrom' field").format(referenceAbbreviation,data['derivedFrom']) ) elif len(indexDict[data['derivedFrom']]) > 1: logging.warning( _("{} edition specifies ambiguous '{}' texts in 'derivedFrom' field").format(referenceAbbreviation,indexDict[data['derivedFrom']]) ) if 'versificationSystem' in data: if not self._BibleVersificationSystems.isValidVersificationSystemName( data['versificationSystem'] ): extra = "\n Available systems are {}".format( self._BibleVersificationSystems.getAvailableVersificationSystemNames()) if Globals.verbosityLevel > 2 else '' logging.error( _("Unknown '{}' versification system name in {}{}").format(data['versificationSystem'],extendedReferenceAbbreviation,extra) ) if 'punctuationSystem' in data: if not self._BiblePunctuationSystems.isValidPunctuationSystemName( data['punctuationSystem'] ): extra = "\n Available systems are {}".format( self._BiblePunctuationSystems.getAvailablePunctuationSystemNames()) if Globals.verbosityLevel > 2 else '' logging.error( _("Unknown '{}' punctuation system name in {}{}").format(data['punctuationSystem'],extendedReferenceAbbreviation,extra) ) self.__dataDicts = dataDict, indexDict, combinedIndexDict return self.__dataDicts # end of importDataToPython def exportDataToPython( self, filepath=None ): """ Writes the information tables to a .py file that can be cut and pasted into a Python program. """ def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ): """Exports theDict to theFile.""" theFile.write( "{} = {{\n # Key is {}\n # Fields are: {}\n".format( dictName, keyComment, fieldsComment ) ) for dictKey in sorted(theDict.keys()): theFile.write( ' {}: {},\n'.format( repr(dictKey), theDict[dictKey] ) ) theFile.write( "}}\n# end of {}\n\n".format( dictName ) ) # end of exportPythonDict from datetime import datetime assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: filepath = os.path.join( "DerivedFiles", self._filenameBase + "_Tables.py" ) if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) dataDict, indexDict, combinedIndexDict = self.importDataToPython() with open( filepath, 'wt' ) as myFile: myFile.write( "# {}\n#\n".format( filepath ) ) myFile.write( "# This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n#\n".format( versionString, datetime.now() ) ) if self.title: myFile.write( "# {}\n".format( self.title ) ) if self.version: myFile.write( "# Version: {}\n".format( self.version ) ) if self.date: myFile.write( "# Date: {}\n#\n".format( self.date ) ) myFile.write( "# {} {} entries loaded from the original XML file.\n".format( len(self._XMLtree), self._treeTag ) ) #myFile.write( "# {} {} loaded from the original XML files.\n#\n\n".format( len(self.systems), self._treeTag ) ) exportPythonDict( myFile, dataDict, "dataDict", "extendedReferenceAbbreviation", "referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, indexDict, "indexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, combinedIndexDict, "combinedIndexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) # end of exportDataToPython def exportDataToJSON( self, filepath=None ): """ Writes the information tables to a .json file that can be easily loaded into a Java program. See http://en.wikipedia.org/wiki/JSON. """ from datetime import datetime import json assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: filepath = os.path.join( "DerivedFiles", self._filenameBase + "_Tables.json" ) if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) with open( filepath, 'wt' ) as myFile: #myFile.write( "# {}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( versionString, datetime.now() ) ) #if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) ) #if self.versionString: myFile.write( "# Version: {}\n".format( self.versionString ) ) #if self.dateString: myFile.write( "# Date: {}\n#\n".format( self.dateString ) ) #myFile.write( "# {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) ) json.dump( self.__dataDicts, myFile, indent=2 ) #myFile.write( "\n\n# end of {}".format( os.path.basename(filepath) ) ) # end of exportDataToJSON def exportDataToC( self, filepath=None ): """ Writes the information tables to a .h file that can be included in c and c++ programs. """ raise Exception( "C export not written yet" ) def exportPythonDict( theFile, theDict, dictName, structName, fieldsComment ): """Exports theDict to theFile.""" def convertEntry( entry ): """Convert special characters in an entry...""" result = "" for field in entry: if result: result += ", " # Separate the fields if field is None: result += '""' elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"' elif isinstance( field, int): result += str(field) else: logging.error( _("Cannot convert unknown field type '{}' in entry '{}'").format( field, entry ) ) return result theFile.write( "static struct {} {}[] = {\n // Fields are {}\n".format( structName, dictName, fieldsComment ) ) for entry in sorted(theDict.keys()): if isinstance( entry, str ): theFile.write( " {\"{}\", {}},\n".format( entry, convertEntry(theDict[entry]) ) ) elif isinstance( entry, int ): theFile.write( " {{}, {}},\n".format( entry, convertEntry(theDict[entry]) ) ) else: logging.error( _("Can't handle this type of data yet: {}").format( entry ) ) theFile.write( "}; // {}\n\n".format( dictName) ) # end of exportPythonDict from datetime import datetime assert( self._XMLtree ) self.importDataToPython() assert( self.__dataDicts ) if not filepath: filepath = os.path.join( "DerivedFiles", self._filenameBase + "_Tables.h" ) if Globals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) ) IDDict, RADict, SBLDict, OADict, PADict, PNDict = self.importDataToPython() ifdefName = self._filenameBase.upper() + "_Tables_h" with open( filepath, 'wt' ) as myFile: myFile.write( "// {}\n//\n".format( filepath ) ) myFile.write( "// This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n//\n".format( versionString, datetime.now() ) ) if self.title: myFile.write( "// {}\n".format( self.title ) ) if self.version: myFile.write( "// Version: {}\n".format( self.version ) ) if self.date: myFile.write( "// Date: {}\n//\n".format( self.date ) ) myFile.write( "// {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) ) myFile.write( "#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) ) exportPythonDict( myFile, IDDict, "IDDict", "{int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "id (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" ) exportPythonDict( myFile, RADict, "RADict", "{char* refAbbrev; int id; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "referenceAbbreviation (sorted), SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, SBLDict, "SBLDict", "{char* SBLAbbrev; int id; char* refAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "SBLAbbreviation (sorted), ReferenceAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, OADict, "OADict", "{char* OSISAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "OSISAbbreviation (sorted), ReferenceAbbreviation, SBLAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, PADict, "PADict", "{char* PTAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTNum; char* EngName;}", "ParatextAbbreviation (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" ) exportPythonDict( myFile, PNDict, "PNDict", "{char* PTNum; int id; char* PTAbbrev; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* EngName;}", "ParatextNumberString (sorted), ParatextAbbreviation, referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, id, nameEnglish (comment only)" ) myFile.write( "#endif // {}\n".format( ifdefName ) )