def __init__(self, sourceFolder, givenName, encoding='utf-8'): """ Constructor: just sets up the VerseView Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = 'VerseView XML Bible object' self.objectTypeString = 'VerseView' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.XMLTree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible: File {!r} is unreadable".format( self.sourceFilepath)) self.name = self.givenName
def preload(self): """ Load the metadata from the SQLite3 database. """ fnPrint(debuggingThisModule, "preload()…") vPrint('Info', debuggingThisModule, _("Preloading {}…").format(self.sourceFilepath)) fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in FILENAME_ENDINGS_TO_ACCEPT: logging.critical("{} doesn't appear to be a MySword file".format( self.sourceFilename)) elif not self.sourceFilename.upper().endswith( BIBLE_FILENAME_ENDINGS_TO_ACCEPT[0]): logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename)) connection = sqlite3.connect(self.sourceFilepath) connection.row_factory = sqlite3.Row # Enable row names self.cursor = connection.cursor() # First get the settings if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['MySword'] = {} self.cursor.execute('select * from Details') row = self.cursor.fetchone() for key in row.keys(): self.suppliedMetadata['MySword'][key] = row[key] #dPrint( 'Quiet', debuggingThisModule, self.suppliedMetadata['MySword'] ); halt #if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description'] #if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.suppliedMetadata['MySword']: logging.critical("{} is encrypted: level {}".format( self.sourceFilename, self.suppliedMetadata['MySword']['encryption'])) self.BibleOrganisationalSystem = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG') self.preloadDone = True
class VerseViewXMLBible(Bible): """ Class for reading, validating, and converting VerseViewXMLBible XML. """ XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}" treeTag = 'bible' filenameTag = 'fname' revisionTag = 'revision' titleTag = 'title' fontTag = 'font' copyrightTag = 'copyright' sizefactorTag = 'sizefactor' bookTag = 'b' chapterTag = 'c' verseTag = 'v' def __init__(self, sourceFolder, givenName, encoding='utf-8'): """ Constructor: just sets up the VerseView Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = 'VerseView XML Bible object' self.objectTypeString = 'VerseView' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.XMLTree = self.header = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible: File {!r} is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of VerseViewXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VerseView'] = {} # Find the main (bible) container if self.XMLTree.tag == VerseViewXMLBible.treeTag: location = "VerseView XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location, 'js24') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') # Find the submain (various info and then book) containers bookNumber = 0 for element in self.XMLTree: if element.tag == VerseViewXMLBible.filenameTag: sublocation = "filename in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') #self.filename = element.text elif element.tag == VerseViewXMLBible.revisionTag: sublocation = "revision in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Revision'] = element.text elif element.tag == VerseViewXMLBible.titleTag: sublocation = "title in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Title'] = element.text elif element.tag == VerseViewXMLBible.fontTag: sublocation = "font in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView']['Font'] = element.text elif element.tag == VerseViewXMLBible.copyrightTag: sublocation = "copyright in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') self.suppliedMetadata['VerseView'][ 'Copyright'] = element.text elif element.tag == VerseViewXMLBible.sizefactorTag: sublocation = "sizefactor in " + location BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jk86') BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'hjk7') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'bh09') if BibleOrgSysGlobals.debugFlag: assert element.text == '1' elif element.tag == VerseViewXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') bookNumber += 1 self.__validateAndExtractBook(element, bookNumber) else: logging.error( "xk15 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( VerseViewXMLBible.treeTag, self.XMLTree.tag)) if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: # These are all compulsory so they should all exist #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) ) vPrint( 'Quiet', debuggingThisModule, "Revision is {!r}".format( self.suppliedMetadata['VerseView']['Revision'])) vPrint( 'Quiet', debuggingThisModule, "Title is {!r}".format( self.suppliedMetadata['VerseView']['Title'])) vPrint( 'Quiet', debuggingThisModule, "Font is {!r}".format( self.suppliedMetadata['VerseView']['Font'])) vPrint( 'Quiet', debuggingThisModule, "Copyright is {!r}".format( self.suppliedMetadata['VerseView']['Copyright'])) #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) ) self.applySuppliedMetadata( 'VerseView') # Copy some to self.settingsDict self.doPostLoadProcessing() # end of VerseViewXMLBible.load def __validateAndExtractBook(self, book, bookNumber): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText(bookName) if BBB is None: adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName) if adjustedBookName != bookName: BBB = self.genericBOS.getBBBFromText(adjustedBookName) BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookNumber) if BBB2 != BBB: # Just double check using the book number if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2: vPrint( 'Quiet', debuggingThisModule, "Assuming that book {} {!r} is {} (not {})".format( bookNumber, bookName, BBB2, BBB)) BBB = BBB2 #vPrint( 'Quiet', debuggingThisModule, BBB ); halt if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'VerseView XML Bible Book object' thisBook.objectTypeString = 'VerseView' #thisBook.sourceFilepath = self.sourceFilepath for element in book: if element.tag == VerseViewXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'al1d') self.__validateAndExtractChapter(BBB, thisBook, element) else: logging.error( "vb26 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook) # end of VerseViewXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB: str, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3: vPrint('Quiet', debuggingThisModule, _("Validating XML chapter…")) # Process the chapter attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #vPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber ) thisBook.addLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for {}".format(BBB)) for element in chapter: if element.tag == VerseViewXMLBible.verseTag: location = "verse in {} {}".format(BBB, chapterNumber) self.__validateAndExtractVerse(BBB, chapterNumber, thisBook, element) else: logging.error("sv34 Expected to find {!r} but got {!r}".format( VerseViewXMLBible.verseTag, element.tag)) # end of VerseViewXMLBible.__validateAndExtractChapter def __validateAndExtractVerse(self, BBB: str, chapterNumber, thisBook, verse): """ Check/validate and extract verse data from the given XML book record finding and saving verse elements. """ if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3: vPrint('Quiet', debuggingThisModule, _("Validating XML verse…")) location = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20') BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks') # Handle verse attributes verseNumber = toVerseNumber = None for attrib, value in verse.items(): if attrib == "n": verseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber location = "{}:{}".format( location, verseNumber) # Get a better location description #thisBook.addLine( 'v', verseNumber ) vText = '' if verse.text is None else verse.text if vText: vText = vText.strip() #if not vText: # This happens if a verse starts immediately with a style or note #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) ) ## Handle verse subelements (notes and styled portions) #for subelement in verse: #if subelement.tag == VerseViewXMLBible.noteTag: #sublocation = "note in " + location #noteType = None #for attrib,value in subelement.items(): #if attrib=="type": noteType = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if noteType and noteType not in ('variant',): #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) ) #nText, nTail = subelement.text, subelement.tail ##vPrint( 'Quiet', debuggingThisModule, "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) ) #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText ) #if nTail: #if '\n' in nTail: #vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) ) #nTail = nTail.replace( '\n', ' ' ) #vText += nTail #for sub2element in subelement: #if sub2element.tag == VerseViewXMLBible.styleTag: #sub2location = "style in " + sublocation #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' ) #fs = css = idStyle = None #for attrib,value in sub2element.items(): #if attrib=='fs': fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle #SFM = None #if fs == 'italic': SFM = '\\it' #elif fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt #sText, sTail = sub2element.text.strip(), sub2element.tail #if BibleOrgSysGlobals.debugFlag: assert sText #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) ) #elif subelement.tag == VerseViewXMLBible.styleTag: #sublocation = "style in " + location #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' ) #fs = css = idStyle = None #for attrib,value in subelement.items(): #if attrib=="fs": fs = value ##elif attrib=="css": css = value ##elif attrib=="id": idStyle = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert fs #SFM = None #if fs == 'super': SFM = '\\bdit' #elif fs == 'emphasis': SFM = '\\em' #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt ##if css == "font-style:italic": SFM = '\\it' ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit' ##elif css == "color:#FF0000": SFM = '\\em' ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add' ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd' ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt #sText, sTail = subelement.text.strip(), subelement.tail #if BibleOrgSysGlobals.debugFlag: assert sText ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, sublocation ) #if SFM: vText += SFM+' ' + sText + SFM+'*' #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles #if sTail: vText += sTail.strip() #elif subelement.tag == VerseViewXMLBible.breakTag: #sublocation = "line break in " + location #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' ) #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' ) #art = None #for attrib,value in subelement.items(): #if attrib=="art": #art = value #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) ) #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl' ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber ) ##assert vText #if vText: #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None #vText = '' #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' ) ##bTail = subelement.tail ##if bTail: vText = bTail.strip() #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) ) if vText: # This is the main text of the verse (follows the verse milestone) if '\n' in vText: vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}" .format(BBB, chapterNumber, verseNumber, vText)) vText = vText.replace('\n', ' ') thisBook.addLine('v', verseNumber + ' ' + vText) verseNumber = None
def importDataToPython(self): """ Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program. (Of course, you can just use the elementTree in self._XMLTree if you prefer.) """ def makeList(parameter1, parameter2): """ Returns a list containing all parameters. Parameter1 may already be a list. """ if isinstance(parameter1, list): #assert parameter2 not in parameter1 parameter1.append(parameter2) return parameter1 else: return [parameter1, parameter2] # end of makeList assert self._XMLTree if self.__DataList: # We've already done an import/restructuring -- no need to repeat it return self.__DataList, self.__DataDict # We'll create a number of dictionaries with different elements as the key rawRefLinkList = [] actualLinkCount = 0 for element in self._XMLTree: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) ) # Get these first for helpful error messages sourceReference = element.find('sourceReference').text sourceComponent = element.find('sourceComponent').text assert sourceComponent in ( 'Section', 'Verses', 'Verse', ) BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1') BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference, 'kd21') BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20') actualRawLinksList = [] for subelement in element: #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) ) if subelement.tag in ( 'sourceReference', 'sourceComponent', ): # already processed these BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'sqw1') elif subelement.tag == 'BibleReferenceLink': BibleOrgSysGlobals.checkXMLNoText(subelement, sourceReference, 'haw9') BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19') BibleOrgSysGlobals.checkXMLNoTail(subelement, sourceReference, 'jsd9') targetReference = subelement.find('targetReference').text targetComponent = subelement.find('targetComponent').text assert targetComponent in ( 'Section', 'Verses', 'Verse', ) linkType = subelement.find('linkType').text assert linkType in ( 'TSK', 'QuotedOTReference', 'AlludedOTReference', 'PossibleOTReference', ) actualRawLinksList.append(( targetReference, targetComponent, linkType, )) actualLinkCount += 1 rawRefLinkList.append(( sourceReference, sourceComponent, actualRawLinksList, )) vPrint( 'Normal', debuggingThisModule, f" {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)" ) myRefLinkList = [] actualLinkCount = 0 BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') for j, (sourceReference, sourceComponent, actualRawLinksList) in enumerate(rawRefLinkList): # Just do some testing first if sourceComponent == 'Verse': x = SimpleVerseKey(sourceReference) else: flag = False try: x = SimpleVerseKey(sourceReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( sourceComponent, sourceReference)) raise TypeError # Now do the actual parsing parsedSourceReference = FlexibleVersesKey(sourceReference) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, j, sourceComponent, sourceReference, parsedSourceReference) #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference actualLinksList = [] for k, (targetReference, targetComponent, linkType) in enumerate(actualRawLinksList): # Just do some testing first if targetComponent == 'Verse': x = SimpleVerseKey(targetReference) else: flag = False try: x = SimpleVerseKey(targetReference, ignoreParseErrors=True) flag = True except TypeError: pass # This should happen coz it should fail the SVK if flag: logging.error("{} {!r} failed!".format( targetComponent, targetReference)) raise TypeError # Now do the actual parsing try: parsedTargetReference = FlexibleVersesKey(targetReference) except TypeError: logging.error( " Temporarily ignored {!r} (TypeError from FlexibleVersesKey)" .format(targetReference)) parsedTargetReference = None if BibleOrgSysGlobals.debugFlag and debuggingThisModule: vPrint('Quiet', debuggingThisModule, ' ', targetComponent, targetReference, parsedTargetReference) #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference actualLinksList.append(( targetReference, targetComponent, parsedTargetReference, linkType, )) actualLinkCount += 1 myRefLinkList.append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) vPrint( 'Normal', debuggingThisModule, " {:,} links processed (with {:,} actual link entries)".format( len(rawRefLinkList), actualLinkCount)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt self.__DataList = myRefLinkList # Now put it into my dictionaries for easy access # This part should be customized or added to for however you need to process the data # Create a link dictionary (by verse key) myRefLinkDict = {} for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for verseRef in parsedSourceReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append(( sourceReference, sourceComponent, parsedSourceReference, actualLinksList, )) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt originalLinks = len(myRefLinkDict) vPrint( 'Quiet', debuggingThisModule, " {:,} verse links added to dictionary (includes filling out spans)" .format(originalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt # Create a reversed link dictionary (by verse key) for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList: #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference ) #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList ) for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList: if parsedTargetReference is not None: for verseRef in parsedTargetReference.getIncludedVerses(): #vPrint( 'Quiet', debuggingThisModule, verseRef ) assert isinstance(verseRef, SimpleVerseKey) if linkType == 'TSK': reverseLinkType = 'TSKQuoted' elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted' elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded' elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible' else: halt # Have a new linkType! if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = [] myRefLinkDict[verseRef].append( (targetReference, targetComponent, parsedTargetReference, [ (sourceReference, sourceComponent, parsedSourceReference, reverseLinkType) ])) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt totalLinks = len(myRefLinkDict) reverseLinks = totalLinks - originalLinks vPrint( 'Quiet', debuggingThisModule, " {:,} reverse links added to dictionary to give {:,} total". format(reverseLinks, totalLinks)) #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt self.__DataDict = myRefLinkDict # Let's find the most number of references for a verse mostReferences = totalReferences = 0 for verseRef, entryList in self.__DataDict.items(): numRefs = len(entryList) if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef totalReferences += numRefs vPrint( 'Quiet', debuggingThisModule, " {:,} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText())) vPrint('Quiet', debuggingThisModule, " {:,} total links for all references".format(totalReferences)) return self.__DataList, self.__DataDict
def createMySwordModule(self, outputFolder, controlDict): """ Create a SQLite3 database module for the program MySword. self here is a Bible object with _processedLines """ import tarfile from BibleOrgSys.Internals.InternalBibleInternals import BOS_ADDED_NESTING_MARKERS, BOS_NESTING_MARKERS from BibleOrgSys.Formats.theWordBible import theWordOTBookLines, theWordNTBookLines, theWordBookLines, theWordHandleIntroduction, theWordComposeVerseLine def writeMSBook(sqlObject, BBB: str, ourGlobals): """ Writes a book to the MySword sqlObject file. """ nonlocal lineCount bkData = self.books[BBB] if BBB in self.books else None #dPrint( 'Quiet', debuggingThisModule, bkData._processedLines ) verseList = BOS.getNumVersesList(BBB) nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB) numC, numV = len(verseList), verseList[0] ourGlobals['line'], ourGlobals['lastLine'] = '', None ourGlobals['pi1'] = ourGlobals['pi2'] = ourGlobals['pi3'] = ourGlobals[ 'pi4'] = ourGlobals['pi5'] = ourGlobals['pi6'] = ourGlobals[ 'pi7'] = False if bkData: # Write book headings (stuff before chapter 1) ourGlobals['line'] = theWordHandleIntroduction( BBB, bkData, ourGlobals) # Write the verses C = V = 1 ourGlobals['lastLine'] = ourGlobals['lastBCV'] = None while True: verseData = None if bkData: try: result = bkData.getContextVerseData(( BBB, str(C), str(V), )) verseData, context = result except KeyError: # Missing verses logging.warning( "BibleWriter.createMySwordModule: missing source verse at {} {}:{}" .format(BBB, C, V)) # Handle some common versification anomalies if (BBB, C, V) == ('JN3', 1, 14): # Add text for v15 if it exists try: result15 = bkData.getContextVerseData(( 'JN3', '1', '15', )) verseData15, context15 = result15 verseData.extend(verseData15) except KeyError: pass # just ignore it elif (BBB, C, V) == ('REV', 12, 17): # Add text for v15 if it exists try: result18 = bkData.getContextVerseData(( 'REV', '12', '18', )) verseData18, context18 = result18 verseData.extend(verseData18) except KeyError: pass # just ignore it composedLine = '' if verseData: composedLine = theWordComposeVerseLine( BBB, C, V, verseData, ourGlobals) # Stay one line behind (because paragraph indicators get appended to the previous line) if ourGlobals['lastBCV'] is not None \ and ourGlobals['lastLine']: # don't bother writing blank (unfinished?) verses sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \ (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) ) lineCount += 1 ourGlobals['lastLine'] = composedLine ourGlobals['lastBCV'] = (nBBB, C, V) V += 1 if V > numV: C += 1 if C > numC: break else: # next chapter only numV = verseList[C - 1] V = 1 #assert not ourGlobals['line'] and not ourGlobals['lastLine'] # We should have written everything # Write the last line of the file if ourGlobals[ 'lastLine']: # don't bother writing blank (unfinished?) verses sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \ (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) ) lineCount += 1 # end of createMySwordModule.writeMSBook # Set-up their Bible reference system BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') #BRL = BibleReferenceList( BOS, BibleObject=None ) # Try to figure out if it's an OT/NT or what (allow for up to 4 extra books like FRT,GLS, etc.) if len(self) <= (39 + 4) and self.containsAnyOT39Books( ) and not self.containsAnyNT27Books(): testament, startBBB, endBBB = 'OT', 'GEN', 'MAL' booksExpected, textLineCountExpected, checkTotals = 39, 23145, theWordOTBookLines elif len(self) <= (27 + 4) and self.containsAnyNT27Books( ) and not self.containsAnyOT39Books(): testament, startBBB, endBBB = 'NT', 'MAT', 'REV' booksExpected, textLineCountExpected, checkTotals = 27, 7957, theWordNTBookLines else: # assume it's an entire Bible testament, startBBB, endBBB = 'BOTH', 'GEN', 'REV' booksExpected, textLineCountExpected, checkTotals = 66, 31102, theWordBookLines extension = '.bbl.mybible' vPrint('Info', debuggingThisModule, _(" Exporting to MySword format…")) mySettings = {} mySettings['unhandledMarkers'] = set() handledBooks = [] if 'MySwordOutputFilename' in controlDict: filename = controlDict['MySwordOutputFilename'] elif self.sourceFilename: filename = self.sourceFilename elif self.shortName: filename = self.shortName elif self.abbreviation: filename = self.abbreviation elif self.name: filename = self.name else: filename = 'export' if not filename.endswith(extension): filename += extension # Make sure that we have the right file extension filepath = os.path.join(outputFolder, BibleOrgSysGlobals.makeSafeFilename(filename)) if os.path.exists(filepath): os.remove(filepath) vPrint('Info', debuggingThisModule, ' createMySwordModule: ' + _("Writing {!r}…").format(filepath)) conn = sqlite3.connect(filepath) cursor = conn.cursor() # First write the settings Details table exeStr = 'CREATE TABLE Details(Description NVARCHAR(255), Abbreviation NVARCHAR(50), Comments TEXT, Version TEXT, VersionDate DATETIME, PublishDate DATETIME, RightToLeft BOOL, OT BOOL, NT BOOL, Strong BOOL' # incomplete customCSS = self.getSetting('CustomCSS') if customCSS: exeStr += ', CustomCSS TEXT' exeStr += ')' cursor.execute(exeStr) values = [] description = self.getSetting('Description') if not description: description = self.getSetting('description') if not description: description = self.name values.append(description) if self.abbreviation: abbreviation = self.abbreviation else: abbreviation = self.getSetting('WorkAbbreviation') if not abbreviation: abbreviation = self.name[:3].upper() values.append(abbreviation) comments = self.getSetting('Comments') values.append(comments) version = self.getSetting('Version') values.append(version) versionDate = self.getSetting('VersionDate') values.append(versionDate) publishDate = self.getSetting('PublishDate') values.append(publishDate) rightToLeft = self.getSetting('RightToLeft') values.append(rightToLeft) values.append(True if testament == 'OT' or testament == 'BOTH' else False) values.append(True if testament == 'NT' or testament == 'BOTH' else False) Strong = self.getSetting('Strong') values.append(Strong if Strong else False) if customCSS: values.append(customCSS) exeStr = 'INSERT INTO "Details" VALUES(' + '?,' * (len(values) - 1) + '?)' #dPrint( 'Quiet', debuggingThisModule, exeStr, values ) cursor.execute(exeStr, values) #if BibleOrgSysGlobals.debugFlag: cursor.execute( exeStr, values ) #else: # Not debugging #try: cursor.execute( exeStr, values ) #except sqlite3.InterfaceError: #logging.critical( "SQLite3 Interface error executing {} with {}".format( exeStr, values ) ) # Now create and fill the Bible table cursor.execute( 'CREATE TABLE Bible(Book INT, Chapter INT, Verse INT, Scripture TEXT, Primary Key(Book,Chapter,Verse))' ) conn.commit() # save (commit) the changes BBB, lineCount = startBBB, 0 while True: # Write each Bible book in the KJV order writeMSBook(cursor, BBB, mySettings) conn.commit() # save (commit) the changes handledBooks.append(BBB) if BBB == endBBB: break BBB = BOS.getNextBookCode(BBB) conn.commit() # save (commit) the changes cursor.close() if mySettings['unhandledMarkers']: logging.warning( "BibleWriter.createMySwordModule: Unhandled markers were {}". format(mySettings['unhandledMarkers'])) vPrint( 'Normal', debuggingThisModule, " " + _("WARNING: Unhandled createMySwordModule markers were {}").format( mySettings['unhandledMarkers'])) unhandledBooks = [] for BBB in self.getBookList(): if BBB not in handledBooks: unhandledBooks.append(BBB) if unhandledBooks: logging.warning("createMySwordModule: Unhandled books were {}".format( unhandledBooks)) vPrint( 'Normal', debuggingThisModule, " " + _("WARNING: Unhandled createMySwordModule books were {}").format( unhandledBooks)) # Now create the gzipped file vPrint('Info', debuggingThisModule, " Compressing {} MySword file…".format(filename)) tar = tarfile.open(filepath + '.gz', 'w:gz') tar.add(filepath) tar.close() if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1: vPrint('Quiet', debuggingThisModule, " BibleWriter.createMySwordModule finished successfully.") return True
def main() -> None: """ This is the main program for the app which just tries to open and load some kind of Bible file(s) from the inputFolder that you specified and then export a PhotoBible (in the default BOSOutputFiles folder). Note that the standard verbosityLevel is 2: -s (silent) is 0 -q (quiet) is 1 -i (information) is 3 -v (verbose) is 4. """ BibleOrgSysGlobals.introduceProgram(__name__, programNameVersion, LAST_MODIFIED_DATE) ourBibleOrganisationalSystem = BibleOrganisationalSystem( "GENERIC-KJV-66-ENG") ourVersificationSystem = ourBibleOrganisationalSystem.getVersificationSystemName( ) ourBibleSingleReference = BibleSingleReference( ourBibleOrganisationalSystem) vPrint('Quiet', debuggingThisModule, _("Use QUIT or EXIT to finish.")) while True: # Loop until they stop it userInput = input('\n' + _( "Enter a verse number 1..31102 or a single Bible verse reference (or QUIT): " )) if userInput.lower() in ( 'exit', 'quit', 'q', 'stop', 'halt', ): break # See if it's an absolute verse number try: userInt = int(userInput) except ValueError: userInt = None if userInt: if 1 <= userInt <= 31102: BBB, C, V = ourBibleOrganisationalSystem.convertAbsoluteVerseNumber( userInt) vPrint( 'Quiet', debuggingThisModule, _("{} verse number {} is {} {}:{}").format( ourVersificationSystem, userInt, BBB, C, V)) else: vPrint('Quiet', debuggingThisModule, _("Absolute verse numbers must be in range 1..31,102.")) else: # assume it's a Bible reference adjustedUserInput = userInput if ':' not in adjustedUserInput: for alternative in ( '.', ',', '-', ): # Handle possible alternative C:V punctuations if alternative in adjustedUserInput: adjustedUserInput = adjustedUserInput.replace( alternative, ':', 1) break results = ourBibleSingleReference.parseReferenceString( adjustedUserInput) #dPrint( 'Quiet', debuggingThisModule, results ) successFlag, haveWarnings, BBB, C, V, S = results if successFlag: vPrint( 'Quiet', debuggingThisModule, _("{!r} converted to {} {}:{} in our internal system."). format(userInput, BBB, C, V)) absoluteVerseNumber = ourBibleOrganisationalSystem.getAbsoluteVerseNumber( BBB, C, V) vPrint( 'Quiet', debuggingThisModule, _(" {} {}:{} is verse number {:,} in the {} versification system." ).format(BBB, C, V, absoluteVerseNumber, ourVersificationSystem)) if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, _(" {} {}:{} is verse number 0x{:04x} in the {} versification system." ).format(BBB, C, V, absoluteVerseNumber, ourVersificationSystem)) else: vPrint( 'Quiet', debuggingThisModule, _("Unable to find a valid single verse reference in your input: {!r}" ).format(userInput))
class MySwordBible(Bible): """ Class for reading, validating, and converting MySwordBible files. """ def __init__(self, sourceFolder, givenFilename, encoding='utf-8') -> None: """ Constructor: just sets up the Bible object. """ # Setup and initialise the base class first Bible.__init__(self) self.objectNameString = 'MySword Bible object' self.objectTypeString = 'MySword' # Now we can set our object variables self.sourceFolder, self.sourceFilename, self.encoding = sourceFolder, givenFilename, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.sourceFilename) # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): logging.critical( _("MySwordBible: File {!r} is unreadable").format( self.sourceFilepath)) filenameBits = os.path.splitext(self.sourceFilename) self.name = filenameBits[0] self.fileExtension = filenameBits[1] #if self.fileExtension.upper().endswith('X'): #logging.warning( _("MySwordBible: File {!r} is encrypted").format( self.sourceFilepath ) ) # end of MySwordBible.__init__ def preload(self): """ Load the metadata from the SQLite3 database. """ fnPrint(debuggingThisModule, "preload()…") vPrint('Info', debuggingThisModule, _("Preloading {}…").format(self.sourceFilepath)) fileExtensionUpper = self.fileExtension.upper() if fileExtensionUpper not in FILENAME_ENDINGS_TO_ACCEPT: logging.critical("{} doesn't appear to be a MySword file".format( self.sourceFilename)) elif not self.sourceFilename.upper().endswith( BIBLE_FILENAME_ENDINGS_TO_ACCEPT[0]): logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename)) connection = sqlite3.connect(self.sourceFilepath) connection.row_factory = sqlite3.Row # Enable row names self.cursor = connection.cursor() # First get the settings if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['MySword'] = {} self.cursor.execute('select * from Details') row = self.cursor.fetchone() for key in row.keys(): self.suppliedMetadata['MySword'][key] = row[key] #dPrint( 'Quiet', debuggingThisModule, self.suppliedMetadata['MySword'] ); halt #if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description'] #if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation'] if 'encryption' in self.suppliedMetadata['MySword']: logging.critical("{} is encrypted: level {}".format( self.sourceFilename, self.suppliedMetadata['MySword']['encryption'])) self.BibleOrganisationalSystem = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG') self.preloadDone = True # end of MySwordBible.preload def load(self): """ Load all the books out of the SQLite3 database. """ fnPrint(debuggingThisModule, "load()…") assert self.preloadDone vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[ 'MySword']['NT']: testament, BBB = 'BOTH', 'GEN' booksExpected, textLineCountExpected = 66, 31102 elif self.suppliedMetadata['MySword']['OT']: testament, BBB = 'OT', 'GEN' booksExpected, textLineCountExpected = 39, 23145 elif self.suppliedMetadata['MySword']['NT']: testament, BBB = 'NT', 'MAT' booksExpected, textLineCountExpected = 27, 7957 # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB) C = V = 1 bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #dPrint( 'Quiet', debuggingThisModule, "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #dPrint( 'Quiet', debuggingThisModule, row ) line = None #dPrint( 'Quiet', debuggingThisModule, nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Have missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}" .format(BBB, C, V, line)) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}" .format(BBB, C, V, line, self.suppliedMetadata['MySword'])) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #dPrint( 'Quiet', debuggingThisModule, "MySword.load", BBB, C, V, repr(line) ) handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C > numC: # Save this book now if haveLines: vPrint('Verbose', debuggingThisModule, " MySword saving", BBB, bookCount + 1) self.stashBook(thisBook) #else: vPrint( 'Quiet', debuggingThisModule, "Not saving", BBB ) bookCount += 1 # Not the number saved but the number we attempted to process if bookCount >= booksExpected: break BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB) # Create the next book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' haveLines = False verseList = self.BibleOrganisationalSystem.getNumVersesList( BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber( BBB) C = V = 1 #thisBook.addLine( 'c', str(C) ) else: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False self.cursor.close() del self.cursor self.applySuppliedMetadata('MySword') # Copy some to self.settingsDict self.doPostLoadProcessing() # end of MySwordBible.load def loadBook(self, BBB: str): """ Load the requested book out of the SQLite3 database. """ fnPrint(debuggingThisModule, "loadBook( {} )".format(BBB)) assert self.preloadDone if BBB in self.books: dPrint('Quiet', debuggingThisModule, " {} is already loaded -- returning".format(BBB)) return # Already loaded if BBB in self.triedLoadingBook: logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name)) return # We've already attempted to load this book self.triedLoadingBook[BBB] = True self.bookNeedsReloading[BBB] = False if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath)) #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'BOTH', 'GEN' #booksExpected, textLineCountExpected = 1, 31102 #elif self.suppliedMetadata['MySword']['OT']: #testament, BBB = 'OT', 'GEN' #booksExpected, textLineCountExpected = 1, 23145 #elif self.suppliedMetadata['MySword']['NT']: #testament, BBB = 'NT', 'MAT' #booksExpected, textLineCountExpected = 1, 7957 # Create the first book thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'MySword Bible Book object' thisBook.objectTypeString = 'MySword' verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB) numC, numV = len(verseList), verseList[0] nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB) C = V = 1 #bookCount = 0 ourGlobals = {} continued = ourGlobals['haveParagraph'] = False haveLines = False while True: self.cursor.execute( 'select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB, C, V)) try: row = self.cursor.fetchone() line = row[0] except TypeError: # This reference is missing (row is None) #dPrint( 'Quiet', debuggingThisModule, "something wrong at", BBB, C, V ) #if BibleOrgSysGlobals.debugFlag: halt #dPrint( 'Quiet', debuggingThisModule, row ) line = None #dPrint( 'Quiet', debuggingThisModule, nBBB, BBB, C, V, 'MySw file line is "' + line + '"' ) if line is None: logging.warning( "MySwordBible.load: Have missing verse line at {} {}:{}". format(BBB, C, V)) else: # line is not None if not isinstance(line, str): if 'encryption' in self.suppliedMetadata['MySword']: logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}" .format(BBB, C, V, line)) break else: logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}" .format(BBB, C, V, line, self.suppliedMetadata['MySword'])) elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}" .format(BBB, C, V)) else: haveLines = True # Some modules end lines with \r\n or have it in the middle! # (We just ignore these for now) while line and line[-1] in '\r\n': line = line[:-1] if '\r' in line or '\n' in line: # (in the middle) logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}" .format(BBB, C, V)) line = line.replace('\r\n', ' ').replace('\r', ' ').replace('\n', ' ') #dPrint( 'Quiet', debuggingThisModule, "MySword.load", BBB, C, V, repr(line) ) handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals) V += 1 if V > numV: C += 1 if C <= numC: # next chapter only #thisBook.addLine( 'c', str(C) ) numV = verseList[C - 1] V = 1 else: # Save this book now if haveLines: vPrint('Info', debuggingThisModule, " MySword saving", BBB) self.stashBook(thisBook) #else: vPrint( 'Quiet', debuggingThisModule, "Not saving", BBB ) break if ourGlobals['haveParagraph']: thisBook.addLine('p', '') ourGlobals['haveParagraph'] = False
def load(self): """ Load a single source file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') if BOS81 is None: BOS81 = BibleOrganisationalSystem('GENERIC-KJV-80-ENG') if BOSx is None: BOSx = BibleOrganisationalSystem('GENERIC-ENG') if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 bookCode = BBB = metadataName = None lastBookCode = lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open(self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1] == '\n': line = line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount == 1: if self.encoding.lower() == 'utf-8' and line[0] == chr( 65279): #U+FEFF or \ufeff logging.info( " ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[ 1:] # Remove the Unicode Byte Order Marker (BOM) match = re.search('^; TITLE:\\s', line) if match: if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "First line got type {!r} match from {!r}". format(match.group(0), line)) else: vPrint( 'Verbose', debuggingThisModule, "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}" .format(firstLine, thisFilename)) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher file line is "' + line + '"' ) lastLine = line # Process header stuff if line.startswith('; TITLE:'): string = line[8:].strip() if string: settingsDict['TITLE'] = string continue elif line.startswith('; ABBREVIATION:'): string = line[15:].strip() if string: settingsDict['ABBREVIATION'] = string continue elif line.startswith('; HAS ITALICS'): string = line[14:].strip() if string: settingsDict['HAS_ITALICS'] = string continue elif line.startswith('; HAS FOOTNOTES:'): string = line[15:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS FOOTNOTES'): string = line[14:].strip() if string: settingsDict['HAS_FOOTNOTES'] = string continue elif line.startswith('; HAS REDLETTER'): string = line[14:].strip() if string: settingsDict['HAS_REDLETTER'] = string continue elif line[0] == ';': logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}" .format(line)) continue # Just discard comment lines # Process the main segment if line.startswith('$$ '): if metadataName and metadataContents: settingsDict[metadataName] = metadataContents metadataName = None pointer = line[3:] #vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) ) if pointer and pointer[0] == '{' and pointer[-1] == '}': metadataName = pointer[1:-1] if metadataName: #vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) ) metadataContents = '' else: # let's assume it's a BCV reference pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) B_CV_Bits = pointer.split(' ', 1) if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: bookCode, CVString = B_CV_Bits chapterNumberString, verseNumberString = CVString.split( ':') chapterNumber = int(chapterNumberString) verseNumber = int(verseNumberString) if bookCode != lastBookCode: # We've started a new book if bookCode in ('Ge', ): BBB = 'GEN' elif bookCode in ('Le', ): BBB = 'LEV' elif bookCode in ('La', ): BBB = 'LAM' ##elif bookCode in ('Es',): BBB = 'EST' ##elif bookCode in ('Pr',): BBB = 'PRO' #elif bookCode in ('So',): BBB = 'SNG' #elif bookCode in ('La',): BBB = 'LAM' #elif bookCode in ('Jude',): BBB = 'JDE' else: #vPrint( 'Quiet', debuggingThisModule, "4BookCode =", repr(bookCode) ) #BBB = BOS.getBBBFromText( bookCode ) # Try to guess BBB = BOS66.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCode) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCode) # Try to guess #vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) ) else: vPrint('Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits) continue # Just save the pointer information which refers to the text on the next line else: # it's not a $$ line text = line #vPrint( 'Quiet', debuggingThisModule, "text", repr(text) ) if metadataName: metadataContents += ('\n' if metadataContents else '') + text continue else: vText = text # Handle bits like (<scripref>Pr 2:7</scripref>) vText = vText.replace('(<scripref>', '\\x - \\xt ').replace( '</scripref>)', '\\x*') vText = vText.replace('<scripref>', '\\x - \\xt ').replace( '</scripref>', '\\x*') #if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher vText', repr(vText) ) #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) # Convert {stuff} to footnotes match = re.search('\\{(.+?)\\}', vText) while match: footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1)) vText = vText[:match.start( )] + footnoteText + vText[ match.end():] # Replace this footnote #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\{(.+?)\\}', vText) # Convert [stuff] to added fields match = re.search('\\[(.+?)\\]', vText) while match: addText = '\\add {}\\add*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\[(.+?)\\]', vText) # Convert +r/This text is red-letter-r/ to wj fields match = re.search('\\+r/(.+?)-r/', vText) while match: addText = '\\wj {}\\wj*'.format(match.group(1)) vText = vText[:match.start()] + addText + vText[ match.end():] # Replace this chunk #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) match = re.search('\\+r/(.+?)-r/', vText) # Final check for unexpected remaining formatting for badChar in '{}[]/': if badChar in vText: logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}" .format(BBB, chapterNumberString, verseNumberString, vText)) break if bookCode: if bookCode != lastBookCode: # We've started a new book if lastBookCode != -1: # Better save the last book self.stashBook(thisBook) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB)) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object' thisBook.objectTypeString = 'ForgeForSwordSearcher' verseList = BOSx.getNumVersesList(BBB) numChapters, numVerses = len( verseList), verseList[0] lastBookCode = bookCode lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code" .format(bookCode)) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB == 'ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}". format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})" .format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters)) thisBook.addLine('c', chapterNumberString) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber == lastVerseNumber and vText == lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}" ).format(self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}" ).format(lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}" ).format(verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString)) # Check for paragraph markers if vText and vText[0] == '¶': thisBook.addLine('p', '') vText = vText[1:].lstrip() #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine('v', verseNumberString + ' ' + vText) lastVText = vText lastVerseNumber = verseNumber else: # No bookCode yet logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}" .format(line)) # Save the final book if thisBook is not None: self.stashBook(thisBook) # Clean up if settingsDict: #vPrint( 'Quiet', debuggingThisModule, "ForgeForSwordSearcher settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['Forge4SS'] = settingsDict self.applySuppliedMetadata( 'Forge4SS') # Copy some to self.settingsDict self.doPostLoadProcessing()
def preload(self): """ Loads the Metadata file if it can be found. """ fnPrint(debuggingThisModule, "preload() from {}".format(self.sourceFilepath)) self.unzippedFolderpath = tempfile.mkdtemp(suffix='_GoBible', prefix='BOS_') vPrint('Info', debuggingThisModule, "Extracting files into {}…".format(self.unzippedFolderpath)) with zipfile.ZipFile(self.sourceFilepath) as myzip: # NOTE: Could be a security risk here myzip.extractall(self.unzippedFolderpath) # Do a preliminary check on the contents of our folder foundFiles, foundFolders = [], [] for something in os.listdir(self.unzippedFolderpath): somepath = os.path.join(self.unzippedFolderpath, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error( "GoBible.preload: Not sure what {!r} is in {}!".format( somepath, self.unzippedFolderpath)) numVitalFolders = 0 if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS: continue if folderName in ('Bible Data', 'META-INF'): numVitalFolders += 1 continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( _("GoBible.preload: Surprised to see subfolders in {!r}: {}" ).format(self.unzippedFolderpath, unexpectedFolders)) if not foundFiles: vPrint( 'Quiet', debuggingThisModule, "GoBible.preload: Couldn't find any files in {!r}".format( self.unzippedFolderpath)) raise FileNotFoundError # No use continuing if not numVitalFolders: vPrint( 'Quiet', debuggingThisModule, "GoBible.preload: Couldn't find any vital folders in {!r}". format(self.unzippedFolderpath)) raise FileNotFoundError # No use continuing self.dataFolderpath = os.path.join(self.unzippedFolderpath, 'Bible Data/') if not os.path.isdir(self.dataFolderpath): logging.critical( _("GoBible.preload: Unable to find folder: {}").format( self.dataFolderpath)) # Do a preliminary check on the contents of our subfolder #self.discoveredBookList = [] foundFiles, foundFolders = [], [] for something in os.listdir(self.dataFolderpath): somepath = os.path.join(self.dataFolderpath, something) if os.path.isdir(somepath): foundFolders.append(something) elif os.path.isfile(somepath): foundFiles.append(something) else: logging.error( "GoBible.preload: Not sure what {!r} is in {}!".format( somepath, self.dataFolderpath)) numBookFolders = 0 if foundFolders: unexpectedFolders = [] for folderName in foundFolders: if folderName in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS: continue folderNameLower = folderName.lower() if folderNameLower.endswith('sfm'): # .sfm or .usfm numBookFolders += 1 bookCode = folderName[:-4] # Code below doesn't work -- foldernames vary #BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromOSISAbbreviation( bookCode ) #self.discoveredBookList.append( BBB ) continue unexpectedFolders.append(folderName) if unexpectedFolders: logging.info( _("GoBible.preload: Surprised to see subfolders in {!r}: {}" ).format(self.dataFolderpath, unexpectedFolders)) if not foundFiles: vPrint( 'Quiet', debuggingThisModule, "GoBible.preload: Couldn't find any files in {!r}".format( self.dataFolderpath)) raise FileNotFoundError # No use continuing if not numBookFolders: vPrint( 'Quiet', debuggingThisModule, "GoBible.preload: Couldn't find any book folders in {!r}". format(self.dataFolderpath)) raise FileNotFoundError # No use continuing #dPrint( 'Never', debuggingThisModule, "GoBible.preload: Discovered", self.discoveredBookList ) def readInString(fileBytes, fileIndex): """ Strings have a single byte length, then the UTF-8 characters, then a trailing null. """ stringLength = fileBytes[fileIndex] fileIndex += 1 result = "" while True: nextChar = fileBytes[fileIndex] fileIndex += 1 if not nextChar: break # found the trailing null result += chr(nextChar) assert len(result) == stringLength # Read string correctly return result, stringLength + 2 # Load the Index file with open(os.path.join(self.dataFolderpath, 'Index'), 'rb') as main_index_file: mainIndexContents = main_index_file.read() index = 0 numBooks, = struct.unpack("<H", mainIndexContents[index:index + 2]) index += 2 vPrint('Never', debuggingThisModule, "numBooks", numBooks) self.bookNames, self.filenameBases, self.startChapters, self.numChaptersList, self.numVersesList = [], [], [], [], [] for bookIndex in range(numBooks): #dPrint( 'Quiet', debuggingThisModule, "\nbookIndex", bookIndex ) # Read in the name of the book bookName, consumedBytes = readInString(mainIndexContents, index) vPrint('Never', debuggingThisModule, "bookName", repr(bookName)) self.bookNames.append(bookName) index += consumedBytes # Read in the short book name filenameBase, consumedBytes = readInString(mainIndexContents, index) vPrint('Never', debuggingThisModule, "filenameBase", repr(filenameBase)) self.filenameBases.append(filenameBase) index += consumedBytes startChapter, = struct.unpack("<H", mainIndexContents[index:index + 2]) index += 2 vPrint('Never', debuggingThisModule, "startChapter", startChapter) self.startChapters.append(startChapter) # Read in the number of chapters in this book numChapters, = struct.unpack("<H", mainIndexContents[index:index + 2]) index += 2 vPrint('Never', debuggingThisModule, "numChapters", numChapters) self.numChaptersList.append(numChapters) # Read in the file number, verse offset, and number of verses for each chapter versesPerChapter = [] previousFileNumber = 0 verseDataOffset = 0 for chapterIndex in range(numChapters): # Seems that each entry is six bytes vPrint('Never', debuggingThisModule, chapterIndex, mainIndexContents[index:index + 6]) if 1: allVersesLength, = struct.unpack( ">I", mainIndexContents[index:index + 4]) index += 4 numVerses = mainIndexContents[index] index += 1 # Seems that file number for final chapter is always zero (or missing for the last book)!!! try: fileNumber = mainIndexContents[index] index += 1 except IndexError: fileNumber = 0 # Why??? (will be adjusted just below) # Why do we need this ??? if fileNumber == 0 and previousFileNumber > 0: vPrint( 'Never', debuggingThisModule, "Don't know why but: Adjusting file number from 0 to", previousFileNumber) fileNumber = previousFileNumber if fileNumber != previousFileNumber: verseDataOffset = 0 previousFileNumber = fileNumber else: fileNumber, = struct.unpack( "<H", mainIndexContents[index:index + 2]) index += 2 allVersesLength, = struct.unpack( "<I", mainIndexContents[index:index + 4]) index += 3 try: numVerses = mainIndexContents[index] index += 1 except struct.error: numVerses = -1 # Why does it fail for the last chapter of Revelation??? if fileNumber != previousFileNumber: verseDataOffset = 0 previousFileNumber = fileNumber versesPerChapter.append( (numVerses, fileNumber, verseDataOffset, allVersesLength)) if debuggingThisModule: vPrint( 'Quiet', debuggingThisModule, f"Book #{bookIndex+1} chapter {chapterIndex+1:3}: {fileNumber}, {verseDataOffset:,}, {allVersesLength:,}, {numVerses}" ) verseDataOffset += allVersesLength self.numVersesList.append(versesPerChapter) assert index == len(mainIndexContents) self.BibleOrgSystem = BibleOrganisationalSystem('GENERIC-KJV-66') if numBooks == 66: self.bookList = [ BibleOrgSysGlobals.loadedBibleBooksCodes. getBBBFromReferenceNumber(x) for x in range(1, 66 + 1) ] elif numBooks == 27: self.bookList = [ BibleOrgSysGlobals.loadedBibleBooksCodes. getBBBFromReferenceNumber(x) for x in range(40, 66 + 1) ] else: logging.warning( f"GoBible.preload found {numBooks} books -- trying to figure out book codes" ) self.bookList = [] for n in range(numBooks): #dPrint( 'Quiet', debuggingThisModule, f"{n+1}/{numBooks}: Got '{self.bookNames[n]}' and '{self.filenameBases[n]}'" ) BBB1 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( self.bookNames[n]) BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( self.filenameBases[n]) #dPrint( 'Quiet', debuggingThisModule, f"{n+1}/{numBooks}: Found {BBB1} and {BBB2}" ) if BBB1 and (BBB2 == BBB1 or BBB2 is None): BBB = BBB1 elif BBB2 and BBB1 is None: BBB = BBB2 elif BBB1 and BBB2: logging.error( f"GoBible.preload choosing '{self.bookNames[n]}'->{BBB1} over '{self.filenameBases[n]}'->{BBB2}" ) BBB = BBB1 else: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( n) logging.error( f"GoBible.preload unable to discover book code from '{self.bookNames[n]}'->{BBB1} or '{self.filenameBases[n]}'->{BBB2}: assuming {BBB}" ) self.bookList.append(BBB) vPrint('Quiet', debuggingThisModule, "GoBible.preload: {} book details preloaded".format(numBooks)) if len(self.bookList) != numBooks: logging.critical( f"GoBible.preload could only discover book codes for {len(self.bookList)}/{numBooks} books" ) self.preloadDone = True
def load( self ): """ Load a single source file and load book elements. """ vPrint( 'Info', debuggingThisModule, _("Loading {}…").format( self.sourceFilepath ) ) global BOS66, BOS81, BOSx if BOS66 is None: BOS66 = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG' ) if BOS81 is None: BOS81 = BibleOrganisationalSystem( 'GENERIC-KJV-80-ENG' ) if BOSx is None: BOSx = BibleOrganisationalSystem( 'GENERIC-ENG' ) if self.suppliedMetadata is None: self.suppliedMetadata = {} lastLine, lineCount = '', 0 vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None lastChapterNumber = lastVerseNumber = -1 lastVText = '' thisBook = None settingsDict = {} with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if line[-1]=='\n': line=line[:-1] # Removing trailing newline character if not line: continue # Just discard blank lines if lineCount==1: if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff logging.info( " VPLBible.load: Detected Unicode Byte Order Marker (BOM)" ) line = line[1:] # Remove the Unicode Byte Order Marker (BOM) # Try to identify the VPL type match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line ) if match: vplType = 1 else: match = re.search( '^(\\d{8})\\s', line ) if match: vplType = 2 else: match = re.search( '^# language_name:\\s', line ) if match: vplType = 3 #else: #match = re.search( '^; TITLE:\\s', line ) #if match: vplType = 4 if match: if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) ) else: vPrint( 'Verbose', debuggingThisModule, "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt continue #vPrint( 'Quiet', debuggingThisModule, 'vplType', vplType ) #vPrint( 'Quiet', debuggingThisModule, 'VPL file line is "' + line + '"' ) lastLine = line # Process header stuff if vplType == 3: if line.startswith( '# language_name:' ): string = line[16:].strip() if string and string != 'Not available': settingsDict['LanguageName'] = string continue elif line.startswith( '# closest ISO 639-3:' ): string = line[20:].strip() if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string continue elif line.startswith( '# year_short:' ): string = line[13:].strip() if string and string != 'Not available': settingsDict['Year.short'] = string continue elif line.startswith( '# year_long:' ): string = line[12:].strip() if string and string != 'Not available': settingsDict['Year.long'] = string continue elif line.startswith( '# title:' ): string = line[8:].strip() if string and string != 'Not available': settingsDict['WorkTitle'] = string continue elif line.startswith( '# URL:' ): string = line[6:].strip() if string and string != 'Not available': settingsDict['URL'] = string continue elif line.startswith( '# copyright_short:' ): string = line[18:].strip() if string and string != 'Not available': settingsDict['Copyright.short'] = string continue elif line.startswith( '# copyright_long:' ): string = line[17:].strip() if string and string != 'Not available': settingsDict['Copyright.long'] = string continue elif line[0]=='#': logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) ) continue # Just discard comment lines #elif vplType == 4: #if line.startswith( '; TITLE:' ): #string = line[8:].strip() #if string: settingsDict['TITLE'] = string #continue #elif line.startswith( '; ABBREVIATION:' ): #string = line[15:].strip() #if string: settingsDict['ABBREVIATION'] = string #continue #elif line.startswith( '; HAS ITALICS:' ): #string = line[15:].strip() #if string: settingsDict['HAS_ITALICS'] = string #continue #elif line.startswith( '; HAS FOOTNOTES:' ): #string = line[15:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS FOOTNOTES' ): #string = line[14:].strip() #if string: settingsDict['HAS_FOOTNOTES'] = string #continue #elif line.startswith( '; HAS REDLETTER:' ): #string = line[15:].strip() #if string: settingsDict['HAS_REDLETTER'] = string #continue #elif line[0]==';': #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) ) #continue # Just discard comment lines # Process the main segment if vplType == 1: bits = line.split( ' ', 2 ) #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits ) if len(bits) == 3 and ':' in bits[1]: bookCodeText, CVString, vText = bits chapterNumberString, verseNumberString = CVString.split( ':' ) #vPrint( 'Quiet', debuggingThisModule, "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) ) if chapterNumberString == '': chapterNumberString = '1' # Handle a bug in some single chapter books in VPL else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits ) if not bookCodeText and not chapterNumberString and not verseNumberString: vPrint( 'Quiet', debuggingThisModule, "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) continue if BibleOrgSysGlobals.debugFlag: assert 2 <= len(bookCodeText) <= 4 if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit() if not verseNumberString.isdigit(): logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCodeText, BBB, chapterNumberString, verseNumberString ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit() continue chapterNumber = int( chapterNumberString ) verseNumber = int( verseNumberString ) if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB #if bookCodeText in ('Ge',): BBB = 'GEN' if bookCodeText == 'Le' and lastBBB == 'GEN': BBB = 'LEV' elif bookCodeText in ('Jud',) and lastBBB == 'JOS': BBB = 'JDG' #elif bookCodeText in ('Es',): BBB = 'EST' #elif bookCodeText in ('Pr',): BBB = 'PRO' #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG' #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM' #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP' #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT #elif bookCodeText in ('Jude',): BBB = 'JDE' #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ' #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN' else: BBB = BOS66.getBBBFromText( bookCodeText ) # Try to guess if not BBB: BBB = BOS81.getBBBFromText( bookCodeText ) # Try to guess if not BBB: BBB = BOSx.getBBBFromText( bookCodeText ) # Try to guess if not BBB: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( bookCodeText ) # Try to guess if not BBB: logging.critical( "VPL Bible: Unable to determine book code from text {!r} after {!r}={}".format( bookCodeText, lastBookCodeText, lastBBB ) ) halt # Handle special formatting # [square-brackets] are for Italicized words # <angle-brackets> are for the Words of Christ in Red # «chevrons» are for the Titles in the Book of Psalms. vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \ .replace( '<', '\\wj ' ).replace( '>', '\\wj*' ) if vText and vText[0]=='«': #vPrint( 'Quiet', debuggingThisModule, "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) ) if BBB=='PSA' and verseNumberString=='1': # Psalm title vBits = vText[1:].split( '»' ) #vPrint( 'Quiet', debuggingThisModule, "vBits", vBits ) thisBook.addLine( 'd', vBits[0] ) # Psalm title vText = vBits[1].lstrip() # Handle the verse info #if verseNumber==lastVerseNumber and vText==lastVText: #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #continue if BBB=='PSA' and verseNumberString=='1' and vText.startswith('<') and self.givenName=='basic_english': # Move Psalm titles to verse zero verseNumber = 0 #if verseNumber < lastVerseNumber: #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #elif verseNumber == lastVerseNumber: #if vText == lastVText: #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) #else: #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif vplType in (2,3): bits = line.split( '\t', 1 ) #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits ) bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:] #vPrint( 'Quiet', debuggingThisModule, bookNumberString, chapterNumberString, verseNumberString ) chapterNumberString = chapterNumberString.lstrip( '0' ) # Remove leading zeroes verseNumberString = verseNumberString.lstrip( '0' ) # Remove leading zeroes bookCodeText, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString) vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \ .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \ .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!') if bookCodeText != lastBookCodeText: # We've started a new book lastBBB = BBB bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS', 76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2', 85:'PSS', 86:'ODE', } if 1 <= bookCodeText <= 66: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookCodeText ) else: BBB = bnDict[bookCodeText] #elif vplType == 4: #if line.startswith( '$$ ' ): #if metadataName and metadataContents: #settingsDict[metadataName] = metadataContents #metadataName = None #pointer = line[3:] ##vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) ) #if pointer and pointer[0]=='{' and pointer[-1]=='}': #metadataName = pointer[1:-1] #if metadataName: ##vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) ) #metadataContents = '' #else: # let's assume it's a BCV reference #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \ #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \ #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \ #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \ #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \ #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \ #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' ) #B_CV_Bits = pointer.split( ' ', 1 ) #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]: #bookCodeText, CVString = B_CV_Bits #chapterNumberString, verseNumberString = CVString.split( ':' ) #chapterNumber = int( chapterNumberString ) #verseNumber = int( verseNumberString ) #if bookCodeText != lastBookCodeText: # We've started a new book #if bookCodeText in ('Ge',): BBB = 'GEN' #elif bookCodeText in ('Le',): BBB = 'LEV' #elif bookCodeText in ('La',): BBB = 'LAM' #else: ##vPrint( 'Quiet', debuggingThisModule, "4bookCodeText =", repr(bookCodeText) ) ##BBB = BOS.getBBBFromText( bookCodeText ) # Try to guess #BBB = BOS66.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText ) # Try to guess #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText ) # Try to guess ##vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) ) #else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits ) #continue # Just save the pointer information which refers to the text on the next line #else: # it's not a $$ line #text = line ##vPrint( 'Quiet', debuggingThisModule, "text", repr(text) ) #if metadataName: #metadataContents += ('\n' if metadataContents else '') + text #continue #else: #vText = text ## Handle bits like (<scripref>Pr 2:7</scripref>) #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' ) #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' ) ##if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'VPL vText', repr(vText) ) #if vplType == 4: # Forge for SwordSearcher ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) ## Convert {stuff} to footnotes #match = re.search( '\\{(.+?)\\}', vText ) #while match: #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) ) #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\{(.+?)\\}', vText ) ## Convert [stuff] to added fields #match = re.search( '\\[(.+?)\\]', vText ) #while match: #addText = '\\add {}\\add*'.format( match.group(1) ) #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) ) #match = re.search( '\\[(.+?)\\]', vText ) #for badChar in '{}[]': #if badChar in vText: #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) ) #break else: logging.critical( 'Unknown VPL type {}'.format( vplType ) ) if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt if bookCodeText: if bookCodeText != lastBookCodeText: # We've started a new book if lastBookCodeText is not None: # Better save the last book self.stashBook( thisBook ) if BBB: if BBB in self: logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) ) if BibleOrgSysGlobals.debugFlag: assert BBB not in self thisBook = BibleBook( self, BBB ) thisBook.objectNameString = 'VPL Bible Book object' thisBook.objectTypeString = 'VPL' verseList = BOSx.getNumVersesList( BBB ) numChapters, numVerses = len(verseList), verseList[0] lastBookCodeText = bookCodeText lastChapterNumber = lastVerseNumber = -1 else: logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCodeText ) ) if BibleOrgSysGlobals.debugFlag: halt if BBB: if chapterNumber != lastChapterNumber: # We've started a new chapter if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception if chapterNumber == 0: logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif chapterNumber > numChapters: logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, numChapters ) ) thisBook.addLine( 'c', chapterNumberString ) lastChapterNumber = chapterNumber lastVerseNumber = -1 # Handle the verse info if verseNumber==lastVerseNumber and vText==lastVText: logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) continue if verseNumber < lastVerseNumber: logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) elif verseNumber == lastVerseNumber: if vText == lastVText: logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) else: logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) ) # Check for paragraph markers if vText and vText[0]=='¶': thisBook.addLine( 'p', '' ) vText = vText[1:].lstrip() #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) ) thisBook.addLine( 'v', verseNumberString + ' ' + vText ) lastVText = vText lastVerseNumber = verseNumber else: # No bookCodeText yet logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) ) # Save the final book if thisBook is not None: self.stashBook( thisBook ) # Clean up if settingsDict: #vPrint( 'Quiet', debuggingThisModule, "VPL settingsDict", settingsDict ) if self.suppliedMetadata is None: self.suppliedMetadata = {} self.suppliedMetadata['VPL'] = settingsDict self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict self.doPostLoadProcessing()
class OpenSongXMLBible(Bible): """ Class for reading, validating, and converting OpenSong Bible XML. """ treeTag = 'bible' bookTag = 'b' chapterTag = 'c' verseTag = 'v' def __init__(self, sourceFolder, givenName, encoding='utf-8') -> None: """ Constructor: just sets up the XML Bible file converter object. """ # Setup and initialise the base class first dPrint( 'Quiet', debuggingThisModule, "OpenSongXMLBible( {}, {}, {} )".format(sourceFolder, givenName, encoding)) Bible.__init__(self) self.objectNameString = 'OpenSong XML Bible object' self.objectTypeString = 'OpenSong' # Now we can set our object variables self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName) self.XMLTree = None # Will hold the XML data # Get the data tables that we need for proper checking #self.ISOLanguages = ISO_639_3_Languages().loadData() self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG') # Do a preliminary check on the readability of our file if not os.access(self.sourceFilepath, os.R_OK): vPrint( 'Quiet', debuggingThisModule, "OpenSongXMLBible: File {!r} is unreadable".format( self.sourceFilepath)) self.name = self.givenName #if self.name is None: #pass # end of OpenSongXMLBible.__init__ def load(self): """ Load a single source XML file and load book elements. """ vPrint('Info', debuggingThisModule, _("Loading {}…").format(self.sourceFilepath)) self.XMLTree = ElementTree().parse(self.sourceFilepath) if BibleOrgSysGlobals.debugFlag: assert self.XMLTree # Fail here if we didn't load anything at all # Find the main (bible) container if self.XMLTree.tag == OpenSongXMLBible.treeTag: location = "XML file" BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h') BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8') name = shortName = None for attrib, value in self.XMLTree.items(): if attrib == "n": name = value elif attrib == "sn": shortName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in main element". format(attrib, value)) # Find the submain (book) containers for element in self.XMLTree: if element.tag == OpenSongXMLBible.bookTag: sublocation = "book in " + location BibleOrgSysGlobals.checkXMLNoText(element, sublocation, 'g3g5') BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'd3f6') self.__validateAndExtractBook(element) elif element.tag == 'OT': pass elif element.tag == 'NT': pass else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.bookTag, element.tag)) else: logging.error("Expected to load {!r} but got {!r}".format( OpenSongXMLBible.treeTag, self.XMLTree.tag)) self.doPostLoadProcessing() # end of OpenSongXMLBible.load def __validateAndExtractBook(self, book): """ Check/validate and extract book data from the given XML book record finding chapter subelements. """ global BibleBooksNames vPrint('Verbose', debuggingThisModule, _("Validating OpenSong XML book…")) # Process the div attributes first BBB = bookName = None for attrib, value in book.items(): if attrib == "n": bookName = value else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value)) if bookName: BBB = self.genericBOS.getBBBFromText( bookName) # Booknames are usually in English if not BBB: # wasn't English if BibleBooksNames is None: BibleBooksNames = BibleBooksNamesSystems().loadData() BBB = BibleBooksNames.getBBBFromText( bookName) # Try non-English booknames #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB ) if BBB: vPrint('Info', debuggingThisModule, _("Validating {} {}…").format(BBB, bookName)) thisBook = BibleBook(self, BBB) thisBook.objectNameString = 'OpenSong XML Bible Book object' thisBook.objectTypeString = 'OpenSong' #thisBook.sourceFilepath = self.sourceFilepath USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation( BBB) if not USFMAbbreviation: logging.critical( f"Unable to find USFM abbreviation for '{BBB}'") if BibleOrgSysGlobals.strictCheckingFlag: halt USFMAbbreviation = 'XXA' thisBook.addLine( 'id', '{} imported by {}'.format(USFMAbbreviation.upper(), programNameVersion)) thisBook.addLine('h', bookName) thisBook.addLine('mt1', bookName) for element in book: if element.tag == OpenSongXMLBible.chapterTag: sublocation = "chapter in {}".format(BBB) BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd') BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d') self.__validateAndExtractChapter( BBB, thisBook, element) else: logging.error( "Expected to find {!r} but got {!r}".format( OpenSongXMLBible.chapterTag, element.tag)) vPrint('Info', debuggingThisModule, " Saving {} into results…".format(BBB)) self.stashBook(thisBook) else: logging.error( _("OpenSong load doesn't recognize book name: {!r}"). format(bookName)) # no BBB else: logging.error( _("OpenSong load can't find a book name")) # no bookName # end of OpenSongXMLBible.__validateAndExtractBook def __validateAndExtractChapter(self, BBB: str, thisBook, chapter): """ Check/validate and extract chapter data from the given XML book record finding and saving chapter numbers and finding and saving verse elements. """ vPrint('Verbose', debuggingThisModule, _("Validating XML chapter…")) # Process the div attributes first chapterNumber = numVerses = None for attrib, value in chapter.items(): if attrib == "n": chapterNumber = value elif attrib == "VERSES": numVerses = value else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element". format(attrib, value)) if chapterNumber: #dPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber ) chapterNumber = chapterNumber.replace( 'of Solomon ', '') # Fix a mistake in the Chinese_SU module thisBook.addLine('c', chapterNumber) else: logging.error( "Missing 'n' attribute in chapter element for {}".format(BBB)) for element in chapter: if element.tag == OpenSongXMLBible.verseTag: sublocation = "verse in {} {}".format(BBB, chapterNumber) BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks') verseNumber = toVerseNumber = None for attrib, value in element.items(): if attrib == "n": verseNumber = value elif attrib == "t": toVerseNumber = value else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element". format(attrib, value)) if BibleOrgSysGlobals.debugFlag: assert verseNumber #thisBook.addLine( 'v', verseNumber ) vText = element.text if element.text else '' for subelement in element: sub2location = "{} in {}".format(subelement.tag, sublocation) BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sub2location, 'ks03') BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sub2location, 'ks05') if subelement.tag == 'i': vText += '\\it {}\\it*{}'.format( subelement.text, subelement.tail) else: logging.error( "Expected to find 'i' but got {!r}".format( subelement.tag)) vText += element.tail if element.tail else '' if not vText: logging.warning("{} {}:{} has no text".format( BBB, chapterNumber, verseNumber)) #dPrint( 'Quiet', debuggingThisModule, 'vText1', vText ) if vText: # This is the main text of the verse (follows the verse milestone) #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) ) if '\n' in vText: # This is how they represent poety #dPrint( 'Quiet', debuggingThisModule, "vText", repr(vText), repr(element.text) ) for j, textBit in enumerate(vText.split('\n')): if j == 0: thisBook.addLine('q1', '') thisBook.addLine('v', verseNumber + ' ' + textBit) else: thisBook.addLine('q1', textBit) else: # Just one verse line thisBook.addLine('v', verseNumber + ' ' + vText) #dPrint( 'Quiet', debuggingThisModule, 'vText2', vText ) else: logging.error("Expected to find {!r} but got {!r}".format( OpenSongXMLBible.verseTag, element.tag))
def createOpenSongXML(BibleObject, outputFolder=None, controlDict=None, validationSchema=None): """ Using settings from the given control file, converts the USFM information to a UTF-8 OpenSong XML file. This format is roughly documented at http://de.wikipedia.org/wiki/OpenSong_XML but more fields can be discovered by looking at downloaded files. """ vPrint('Normal', debuggingThisModule, "Running createOpenSongXML…") if BibleOrgSysGlobals.debugFlag: assert BibleObject.books ignoredMarkers, unhandledMarkers, unhandledBooks = set(), set(), [] def writeOpenSongBook(writerObject, BBB: str, bkData): """Writes a book to the OpenSong XML writerObject.""" #dPrint( 'Quiet', debuggingThisModule, 'BIBLEBOOK', [('bnumber',BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)), ('bname',BibleOrgSysGlobals.loadedBibleBooksCodes.getEnglishName_NR(BBB)), ('bsname',BibleOrgSysGlobals.loadedBibleBooksCodes.getOSISAbbreviation(BBB))] ) OSISAbbrev = BibleOrgSysGlobals.loadedBibleBooksCodes.getOSISAbbreviation( BBB) if not OSISAbbrev: logging.warning( "toOpenSong: Can't write {} OpenSong book because no OSIS code available" .format(BBB)) unhandledBooks.append(BBB) return writerObject.writeLineOpen('b', ('n', bkData.getAssumedBookNames()[0])) haveOpenChapter, startedFlag, gotVP, accumulator = False, False, None, "" C, V = '-1', '-1' # So first/id line starts at -1:0 for processedBibleEntry in bkData._processedLines: # Process internal Bible data lines marker, text, extras = processedBibleEntry.getMarker( ), processedBibleEntry.getCleanText( ), processedBibleEntry.getExtras() #dPrint( 'Quiet', debuggingThisModule, marker, repr(text) ) #if text: assert text[0] != ' ' if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS: continue # Just ignore added markers -- not needed here if marker in USFM_PRECHAPTER_MARKERS: if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert C == '-1' or marker == 'rem' or marker.startswith( 'mte') V = str(int(V) + 1) if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in ( 'ie', ): # Just ignore these lines ignoredMarkers.add(marker) elif marker == 'c': if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) accumulator = '' if haveOpenChapter: writerObject.writeLineClose('c') C, V = text, '0' writerObject.writeLineOpen('c', ('n', text)) haveOpenChapter = True elif marker in ( 'c#', ): # These are the markers that we can safely ignore for this export ignoredMarkers.add(marker) elif marker == 'vp#': # This precedes a v field and has the verse number to be printed gotVP = text # Just remember it for now elif marker == 'v': V = text if gotVP: # this is the verse number to be published text = gotVP gotVP = None startedFlag = True if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) accumulator = '' #dPrint( 'Quiet', debuggingThisModule, "Text {!r}".format( text ) ) if not text: logging.warning("createOpenSongXML: Missing text for v") continue verseNumberString = text.replace('<', '').replace( '>', '' ).replace( '"', '' ) # Used below but remove anything that'll cause a big XML problem later elif marker in ('mt1','mt2','mt3','mt4', 'mte1','mte2','mte3','mte4', 'ms1','ms2','ms3','ms4', ) \ or marker in USFM_ALL_INTRODUCTION_MARKERS \ or marker in ('s1','s2','s3','s4', 'r','sr','mr', 'd','sp','cd', 'cl','lit', ): ignoredMarkers.add(marker) elif marker in USFM_BIBLE_PARAGRAPH_MARKERS: if BibleOrgSysGlobals.debugFlag: assert not text and not extras ignoredMarkers.add(marker) elif marker in ( 'b', 'nb', 'ib', ): if BibleOrgSysGlobals.debugFlag: assert not text and not extras ignoredMarkers.add(marker) elif marker in ( 'v~', 'p~', ): if BibleOrgSysGlobals.debugFlag: assert text or extras if not text: # this is an empty (untranslated) verse text = '- - -' # but we'll put in a filler if startedFlag: accumulator += (' ' if accumulator else '') + BibleOrgSysGlobals.makeSafeXML(text) else: if text: logging.warning( "toOpenSong: lost text in {} field in {} {}:{} {!r}". format(marker, BBB, C, V, text)) #if BibleOrgSysGlobals.debugFlag: halt if extras: logging.warning( "toOpenSong: lost extras in {} field in {} {}:{}". format(marker, BBB, C, V)) #if BibleOrgSysGlobals.debugFlag: halt unhandledMarkers.add(marker) if extras and marker not in ( 'v~', 'p~', ) and marker not in ignoredMarkers: logging.critical( "toOpenSong: extras not handled for {} at {} {}:{}".format( marker, BBB, C, V)) if accumulator: writerObject.writeLineOpenClose('v', accumulator, ('n', verseNumberString)) if haveOpenChapter: writerObject.writeLineClose('c') writerObject.writeLineClose('b') # end of createOpenSongXML.writeOpenSongBook # Set-up our Bible reference system if 'PublicationCode' not in controlDict or controlDict[ 'PublicationCode'] == 'GENERIC': BOS = BibleObject.genericBOS BRL = BibleObject.genericBRL else: BOS = BibleOrganisationalSystem(controlDict['PublicationCode']) BRL = BibleReferenceList(BOS, BibleObject=None) vPrint('Info', debuggingThisModule, _(" Exporting to OpenSong format…")) try: osOFn = controlDict['OpenSongOutputFilename'] except KeyError: osOFn = 'Bible.osong' filename = BibleOrgSysGlobals.makeSafeFilename(osOFn) xw = MLWriter(filename, outputFolder) xw.setHumanReadable() xw.start() xw.writeLineOpen('Bible') for BBB, bookData in BibleObject.books.items(): writeOpenSongBook(xw, BBB, bookData) xw.writeLineClose('Bible') xw.close() if ignoredMarkers: logging.info("createOpenSongXML: Ignored markers were {}".format( ignoredMarkers)) vPrint( 'Info', debuggingThisModule, " " + _("WARNING: Ignored createOpenSongXML markers were {}").format( ignoredMarkers)) if unhandledMarkers: logging.warning("createOpenSongXML: Unhandled markers were {}".format( unhandledMarkers)) vPrint( 'Normal', debuggingThisModule, " " + _("WARNING: Unhandled toOpenSong markers were {}").format( unhandledMarkers)) if unhandledBooks: logging.warning("createOpenSongXML: Unhandled books were {}".format( unhandledBooks)) vPrint( 'Normal', debuggingThisModule, " " + _("WARNING: Unhandled createOpenSongXML books were {}").format( unhandledBooks)) # Now create a zipped version filepath = os.path.join(outputFolder, filename) vPrint('Info', debuggingThisModule, " Zipping {} OpenSong file…".format(filename)) zf = zipfile.ZipFile(filepath + '.zip', 'w', compression=zipfile.ZIP_DEFLATED) zf.write(filepath, filename) zf.close() if validationSchema: return xw.validate(validationSchema) if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1: vPrint('Quiet', debuggingThisModule, " createOpenSongXML finished successfully.") return True