def getContextVerseData( self, module, key ): """ Returns a InternalBibleEntryList of 5-tuples, e.g., [ ('c', 'c', '1', '1', []), ('c#', 'c', '1', '1', []), ('v', 'v', '1', '1', []), ('v~', 'v~', 'In the beginning God created the heavens and the earth.', 'In the beginning God created the heavens and the earth.', []) ] """ if SwordType == "CrosswireLibrary": try: verseText = module.stripText( key ) except UnicodeDecodeError: print( "Can't decode utf-8 text of {} {}".format( module.getName(), key.getShortText() ) ) return if BibleOrgSysGlobals.debugFlag: if '\n' in verseText or '\r' in verseText: print( t("getVerseData: Why does it have CR or LF in {} {} {}") \ .format( module.getName(), key.getShortText(), repr(verseText) ) ) verseText = verseText.rstrip() verseData = InternalBibleEntryList() c, v = key.getChapterNumberStr(), key.getVerseNumberStr() # Prepend the verse number since Sword modules don't contain that info in the data if v=='1': verseData.append( InternalBibleEntry( 'c#','c', c, c, None, c ) ) verseData.append( InternalBibleEntry( 'v','v', v, v, None, v ) ) verseData.append( InternalBibleEntry( 'v~','v~', verseText, verseText, None, verseText ) ) contextVerseData = verseData, [] # No context elif SwordType == "OurCode": #print( t("module"), module ) try: contextVerseData = module.getContextVerseData( key ) except KeyError: # Just create a blank verse entry verseData = InternalBibleEntryList() c, v = key.getChapterNumberStr(), key.getVerseNumberStr() if v=='1': verseData.append( InternalBibleEntry( 'c#','c', c, c, None, c ) ) verseData.append( InternalBibleEntry( 'v','v', v, v, None, v ) ) contextVerseData = verseData, [] # No context #print( t("gVD={} key={}, st={}").format( module.getName(), key, contextVerseData ) ) if contextVerseData is None: if key.getChapter()!=0 or key.getVerse()!=0: # We're not surprised if there's no chapter or verse zero print( t("SwordInterface.getVerseData no VD"), module.getName(), key, contextVerseData ) contextVerseData = [], None else: verseData, context = contextVerseData #print( "vD", verseData ) #assert( isinstance( verseData, InternalBibleEntryList ) or isinstance( verseData, list ) ) assert( isinstance( verseData, InternalBibleEntryList ) ) #assert( isinstance( verseData, list ) ) assert( 1 <= len(verseData) <= 6 ) #print( verseData ); halt return contextVerseData
def getContextVerseData(self, module, key): """ Returns a InternalBibleEntryList of 5-tuples, e.g., [ ('c', 'c', '1', '1', []), ('c#', 'c', '1', '1', []), ('v', 'v', '1', '1', []), ('v~', 'v~', 'In the beginning God created the heavens and the earth.', 'In the beginning God created the heavens and the earth.', []) ] """ if SwordType == "CrosswireLibrary": try: verseText = module.stripText(key) except UnicodeDecodeError: print("Can't decode utf-8 text of {} {}".format( module.getName(), key.getShortText())) return if BibleOrgSysGlobals.debugFlag: if '\n' in verseText or '\r' in verseText: print( t("getVerseData: Why does it have CR or LF in {} {} {}") \ .format( module.getName(), key.getShortText(), repr(verseText) ) ) verseText = verseText.rstrip() verseData = InternalBibleEntryList() c, v = key.getChapterNumberStr(), key.getVerseNumberStr() # Prepend the verse number since Sword modules don't contain that info in the data if v == '1': verseData.append(InternalBibleEntry('c#', 'c', c, c, None, c)) verseData.append(InternalBibleEntry('v', 'v', v, v, None, v)) verseData.append( InternalBibleEntry('v~', 'v~', verseText, verseText, None, verseText)) contextVerseData = verseData, [] # No context elif SwordType == "OurCode": #print( t("module"), module ) try: contextVerseData = module.getContextVerseData(key) except KeyError: # Just create a blank verse entry verseData = InternalBibleEntryList() c, v = key.getChapterNumberStr(), key.getVerseNumberStr() if v == '1': verseData.append( InternalBibleEntry('c#', 'c', c, c, None, c)) verseData.append(InternalBibleEntry('v', 'v', v, v, None, v)) contextVerseData = verseData, [] # No context #print( t("gVD={} key={}, st={}").format( module.getName(), key, contextVerseData ) ) if contextVerseData is None: if key.getChapter() != 0 or key.getVerse( ) != 0: # We're not surprised if there's no chapter or verse zero print(t("SwordInterface.getVerseData no VD"), module.getName(), key, contextVerseData) contextVerseData = [], None else: verseData, context = contextVerseData #print( "vD", verseData ) #assert( isinstance( verseData, InternalBibleEntryList ) or isinstance( verseData, list ) ) assert (isinstance(verseData, InternalBibleEntryList)) #assert( isinstance( verseData, list ) ) assert (1 <= len(verseData) <= 6) #print( verseData ); halt return contextVerseData
class BCVBibleBook(BibleBook): """ Class to load and manipulate a single BCV file / book. """ def __init__(self, containerBibleObject, BBB): """ Create the BCV Bible book object. """ BibleBook.__init__(self, containerBibleObject, BBB) # Initialise the base class self.objectNameString = 'BCV Bible Book object' self.objectTypeString = 'BCV' # end of BCVBibleBook.__init__ def loadBookMetadata(self, metadataFilepath): """ Process the metadata from the given filepath. Sets some class variables and puts a dictionary into self.settingsDict. """ if BibleOrgSysGlobals.debugFlag and BibleOrgSysGlobals.verbosityLevel > 2: print(' ' + exp("Loading {} metadata from {!r}…").format( self.BBB, metadataFilepath)) #if encoding is None: encoding = 'utf-8' self.metadataFilepath = metadataFilepath self.givenCVList = None lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open(metadataFilepath ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and line and line[0] == chr(65279): #U+FEFF logging.info( exp("loadBookMetadata: Detected Unicode Byte Order Marker (BOM) in {}" ).format(metadataFilepath)) line = line[1:] # Remove the Byte Order Marker (BOM) if line and line[-1] == '\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False #BCVVersion = 1.0 #WorkName = Matigsalug #CVList = [('1', '1'), ('1', '2'), ('1', '3'), ('1', '4'), ('1', '5'), … for fieldName in ( 'BCVVersion', 'WorkName', 'CVList', ): if line.startswith(fieldName + ' = '): settingsDict[fieldName] = line[len(fieldName) + 3:] processed = True break if not processed: print( exp("ERROR: Unexpected {!r} line in metadata file"). format(line)) #print( 'SD', settingsDict ) if BibleOrgSysGlobals.verbosityLevel > 2: print(" " + exp("Got {} metadata entries:").format(len(settingsDict))) if BibleOrgSysGlobals.verbosityLevel > 3: for key in sorted(settingsDict): print(" {}: {}".format(key, settingsDict[key])) if 'BCVVersion' in settingsDict: settingsDict['BCVVersion'] == '1.0' del settingsDict['BCVVersion'] if 'WorkName' in settingsDict: self.workName = settingsDict['WorkName'] del settingsDict['WorkName'] #if 'Name' in settingsDict: self.projectName = settingsDict['Name']; del settingsDict['Name'] #if 'Abbreviation' in settingsDict: self.projectName = settingsDict['Abbreviation']; del settingsDict['Abbreviation'] if 'CVList' in settingsDict: #self.givenCVList = None CVL = settingsDict['CVList'] if CVL and CVL[0] == '[' and CVL[-1] == ']': self.givenCVList = eval(CVL) #print( 'x1', repr(self.givenCVList) ) if isinstance(self.givenCVList, list): del settingsDict['CVList'] else: print( exp("ERROR: Unexpected {!r} format in metadata file"). format(CVL)) if settingsDict: self.settingsDict = settingsDict print('book SD', self.settingsDict) # end of BCVBibleBook.loadBookMetadata def load(self, folder): """ Load the BCV Bible book from a folder. Tries to standardise by combining physical lines into logical lines, i.e., so that all lines begin with a BCV paragraph marker. Uses the addLine function of the base class to save the lines. Note: the base class later on will try to break apart lines with a paragraph marker in the middle -- we don't need to worry about that here. """ def doaddLine(originalMarker, originalText): """ Check for newLine markers within the line (if so, break the line) and save the information in our database. Also convert ~ to a proper non-break space. """ #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) ) marker, text = originalMarker, originalText.replace('~', ' ') if '\\' in text: # Check markers inside the lines markerList = BibleOrgSysGlobals.BCVMarkers.getMarkerListFromText( text) ix = 0 for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers if insideMarker == '\\': # it's a free-standing backspace loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}" ).format(self.BBB, C, V, marker, text)) logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}" ).format(self.BBB, C, V, marker, text) ) # Only log the first error in the line self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line" )) elif BibleOrgSysGlobals.BCVMarkers.isNewlineMarker( insideMarker ): # Need to split the line for everything else to work properly if ix == 0: loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}" ).format(self.BBB, C, V, insideMarker, marker, text)) logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}" ).format(insideMarker, self.BBB, C, V, marker, text) ) # Only log the first error in the line self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line" ).format(insideMarker)) thisText = text[ix:iMIndex].rstrip() self.addLine(marker, thisText) ix = iMIndex + 1 + len(insideMarker) + len( nextSignificantChar ) # Get the start of the next text -- the 1 is for the backslash #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) ) marker = insideMarker # setup for the next line if ix != 0: # We must have separated multiple lines text = text[ix:] # Get the final bit of the line self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above) # end of doaddLine if BibleOrgSysGlobals.verbosityLevel > 2: print(" " + _("Loading {} from {}…").format(self.BBB, folder)) self.sourceFolder = os.path.join(folder, self.BBB + '/') # Read book metadata self.loadBookMetadata( os.path.join(self.sourceFolder, self.BBB + '__BookMetadata.txt')) fixErrors = [] self._processedLines = InternalBibleEntryList( ) # Contains more-processed tuples which contain the actual Bible text -- see below DUMMY_VALUE = 999999 # Some number bigger than the number of characters in a line for CV in self.givenCVList: lineCount = 0 if isinstance(CV, tuple) and len(CV) == 2: C, V = CV filename = self.BBB + '_C' + C + 'V' + V + '.txt' else: assert CV == ('-1', ) C = V = '-1', '0' filename = self.BBB + '__Intro.txt' with open(os.path.join(self.sourceFolder, filename), 'rt', encoding='utf-8' ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount == 1 and line and line[0] == chr( 65279): #U+FEFF logging.info( exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}" ).format(metadataFilepath)) line = line[1:] # Remove the Byte Order Marker (BOM) if line and line[-1] == '\n': line = line[:-1] # Remove trailing newline character #print( CV, "line", line ) assert line and line[0] == '\\' ixEQ = line.find('=') ixLL = line.find('<<') if ixEQ == -1: ixEQ = DUMMY_VALUE if ixLL == -1: ixLL = DUMMY_VALUE ix = min(ixEQ, ixLL) marker = line[1:ix] #print( 'marker', repr(marker) ) if ixLL == DUMMY_VALUE: originalMarker = None if marker == 'v~': originalMarker = 'v' elif marker == 'c#': originalMarker = 'c' else: originalMarker = line[ixLL + 2:ixEQ] #print( 'originalMarker', repr(originalMarker) ) if ixEQ == DUMMY_VALUE: text = None else: text = line[ixEQ + 1:] #print( 'text', repr(text) ) if marker[0] == '¬': assert originalMarker is None and text is None adjText = extras = None else: if originalMarker is None: originalMarker = marker if text is None: text = '' adjText, cleanText, extras = self.processLineFix( C, V, originalMarker, text, fixErrors ) # separate out the notes (footnotes and cross-references) self._processedLines.append( InternalBibleEntry(marker, originalMarker, adjText, cleanText, extras, text)) #if loadErrors: self.errorDictionary['Load Errors'] = loadErrors #if debugging: print( self._rawLines ); halt if fixErrors: self.errorDictionary['Fix Text Errors'] = fixErrors self._processedFlag = True self.makeCVIndex()
class BCVBibleBook( BibleBook ): """ Class to load and manipulate a single BCV file / book. """ def __init__( self, containerBibleObject, BBB ): """ Create the BCV Bible book object. """ BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class self.objectNameString = 'BCV Bible Book object' self.objectTypeString = 'BCV' # end of BCVBibleBook.__init__ def loadBookMetadata( self, metadataFilepath ): """ Process the metadata from the given filepath. Sets some class variables and puts a dictionary into self.settingsDict. """ if BibleOrgSysGlobals.debugFlag and BibleOrgSysGlobals.verbosityLevel > 2: print( ' ' + exp("Loading {} metadata from {!r}…").format( self.BBB, metadataFilepath ) ) #if encoding is None: encoding = 'utf-8' self.metadataFilepath = metadataFilepath self.givenCVList = None lastLine, lineCount, status, settingsDict = '', 0, 0, {} with open( metadataFilepath ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1 and line and line[0]==chr(65279): #U+FEFF logging.info( exp("loadBookMetadata: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) ) line = line[1:] # Remove the Byte Order Marker (BOM) if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character line = line.strip() # Remove leading and trailing whitespace if not line: continue # Just discard blank lines lastLine = line processed = False #BCVVersion = 1.0 #WorkName = Matigsalug #CVList = [('1', '1'), ('1', '2'), ('1', '3'), ('1', '4'), ('1', '5'), … for fieldName in ('BCVVersion','WorkName','CVList',): if line.startswith( fieldName+' = ' ): settingsDict[fieldName] = line[len(fieldName)+3:] processed = True break if not processed: print( exp("ERROR: Unexpected {!r} line in metadata file").format( line ) ) #print( 'SD', settingsDict ) if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + exp("Got {} metadata entries:").format( len(settingsDict) ) ) if BibleOrgSysGlobals.verbosityLevel > 3: for key in sorted(settingsDict): print( " {}: {}".format( key, settingsDict[key] ) ) if 'BCVVersion' in settingsDict: settingsDict['BCVVersion'] == '1.0'; del settingsDict['BCVVersion'] if 'WorkName' in settingsDict: self.workName = settingsDict['WorkName']; del settingsDict['WorkName'] #if 'Name' in settingsDict: self.projectName = settingsDict['Name']; del settingsDict['Name'] #if 'Abbreviation' in settingsDict: self.projectName = settingsDict['Abbreviation']; del settingsDict['Abbreviation'] if 'CVList' in settingsDict: #self.givenCVList = None CVL = settingsDict['CVList'] if CVL and CVL[0]=='[' and CVL[-1]==']': self.givenCVList = eval( CVL ) #print( 'x1', repr(self.givenCVList) ) if isinstance( self.givenCVList, list ): del settingsDict['CVList'] else: print( exp("ERROR: Unexpected {!r} format in metadata file").format( CVL ) ) if settingsDict: self.settingsDict = settingsDict print( 'book SD', self.settingsDict ) # end of BCVBibleBook.loadBookMetadata def load( self, folder ): """ Load the BCV Bible book from a folder. Tries to standardise by combining physical lines into logical lines, i.e., so that all lines begin with a BCV paragraph marker. Uses the addLine function of the base class to save the lines. Note: the base class later on will try to break apart lines with a paragraph marker in the middle -- we don't need to worry about that here. """ def doaddLine( originalMarker, originalText ): """ Check for newLine markers within the line (if so, break the line) and save the information in our database. Also convert ~ to a proper non-break space. """ #print( "doaddLine( {}, {} )".format( repr(originalMarker), repr(originalText) ) ) marker, text = originalMarker, originalText.replace( '~', ' ' ) if '\\' in text: # Check markers inside the lines markerList = BibleOrgSysGlobals.BCVMarkers.getMarkerListFromText( text ) ix = 0 for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers if insideMarker == '\\': # it's a free-standing backspace loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") ) elif BibleOrgSysGlobals.BCVMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly if ix==0: loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) ) logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) ) thisText = text[ix:iMIndex].rstrip() self.addLine( marker, thisText ) ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash #print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) ) marker = insideMarker # setup for the next line if ix != 0: # We must have separated multiple lines text = text[ix:] # Get the final bit of the line self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above) # end of doaddLine if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Loading {} from {}…").format( self.BBB, folder ) ) self.sourceFolder = os.path.join( folder, self.BBB+'/' ) # Read book metadata self.loadBookMetadata( os.path.join( self.sourceFolder, self.BBB+'__BookMetadata.txt' ) ) fixErrors = [] self._processedLines = InternalBibleEntryList() # Contains more-processed tuples which contain the actual Bible text -- see below DUMMY_VALUE = 999999 # Some number bigger than the number of characters in a line for CV in self.givenCVList: lineCount = 0 if isinstance( CV, tuple) and len(CV)==2: C, V = CV filename = self.BBB+'_C'+C+'V'+V+'.txt' else: assert CV == ('-1',) C = V = '-1', '0' filename = self.BBB+'__Intro.txt' with open( os.path.join( self.sourceFolder, filename ), 'rt', encoding='utf-8' ) as myFile: # Automatically closes the file when done for line in myFile: lineCount += 1 if lineCount==1 and line and line[0]==chr(65279): #U+FEFF logging.info( exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) ) line = line[1:] # Remove the Byte Order Marker (BOM) if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character #print( CV, "line", line ) assert line and line[0]=='\\' ixEQ = line.find( '=' ) ixLL = line.find( '<<' ) if ixEQ == -1: ixEQ = DUMMY_VALUE if ixLL == -1: ixLL = DUMMY_VALUE ix = min( ixEQ, ixLL ) marker = line[1:ix] #print( 'marker', repr(marker) ) if ixLL == DUMMY_VALUE: originalMarker = None if marker == 'v~': originalMarker = 'v' elif marker == 'c#': originalMarker = 'c' else: originalMarker = line[ixLL+2:ixEQ] #print( 'originalMarker', repr(originalMarker) ) if ixEQ == DUMMY_VALUE: text = None else: text = line[ixEQ+1:] #print( 'text', repr(text) ) if marker[0] == '¬': assert originalMarker is None and text is None adjText = extras = None else: if originalMarker is None: originalMarker = marker if text is None: text = '' adjText, cleanText, extras = self.processLineFix( C, V, originalMarker, text, fixErrors ) # separate out the notes (footnotes and cross-references) self._processedLines.append( InternalBibleEntry(marker, originalMarker, adjText, cleanText, extras, text) ) #if loadErrors: self.errorDictionary['Load Errors'] = loadErrors #if debugging: print( self._rawLines ); halt if fixErrors: self.errorDictionary['Fix Text Errors'] = fixErrors self._processedFlag = True self.makeCVIndex()