Пример #1
0
 def __init__(self, containerBibleObject: Bible, BBB: str) -> None:
     """
     Create the uW OBS Bible book object.
     """
     BibleBook.__init__(self, containerBibleObject,
                        BBB)  # Initialise the base class
     self.objectNameString = 'uW OBS Bible Book object'
     self.objectTypeString = 'uW OBS'
Пример #2
0
    def __validateAndExtractBook(self, book, bookNumber):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        vPrint('Verbose', debuggingThisModule, _("Validating XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))

        if bookName:
            BBB = self.genericBOS.getBBBFromText(bookName)
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName)
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText(adjustedBookName)
        BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
            bookNumber)
        if BBB2 != BBB:  # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                vPrint(
                    'Quiet', debuggingThisModule,
                    "Assuming that book {} {!r} is {} (not {})".format(
                        bookNumber, bookName, BBB2, BBB))
            BBB = BBB2
            #vPrint( 'Quiet', debuggingThisModule, BBB ); halt

        if BBB:
            vPrint('Info', debuggingThisModule,
                   _("Validating {} {}…").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'j3jd')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error(
                        "vb26 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.chapterTag, element.tag))
            vPrint('Info', debuggingThisModule,
                   "  Saving {} into results…".format(BBB))
            self.stashBook(thisBook)
Пример #3
0
    def __init__(self, containerBibleObject: Bible, BBB: str) -> None:
        """
        Create the USFM2 Bible book object.
        """
        BibleBook.__init__(self, containerBibleObject,
                           BBB)  # Initialise the base class
        self.objectNameString = 'USFM2 Bible Book object'
        self.objectTypeString = 'USFM2'

        global sortedNLMarkers
        if sortedNLMarkers is None:
            sortedNLMarkers = sorted(
                USFM2Markers.getNewlineMarkersList('Combined'),
                key=len,
                reverse=True)
Пример #4
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['Unbound'] = {}

        lastLine, lineCount = '', 0
        BBB = None
        NRSVA_bookCode = NRSVA_chapterNumberString = NRSVA_verseNumberString = None
        subverseNumberString = sequenceNumberString = None
        lastBookCode = lastChapterNumber = lastVerseNumber = lastSequence = -1
        lastVText = ''
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      UnboundBible.load: Detected Unicode Byte Order Marker (BOM)" )
                #line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line and line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #vPrint( 'Quiet', debuggingThisModule, 'UB file line is "' + line + '"' )
                if line[0] == '#':
                    hashBits = line[1:].split('\t')
                    if len(hashBits) == 2 and hashBits[
                            1]:  # We have some valid meta-data
                        self.suppliedMetadata['Unbound'][
                            hashBits[0]] = hashBits[1]
                        #if hashBits[0] == 'name': self.name = hashBits[1]
                        #elif hashBits[0] == 'filetype': self.filetype = hashBits[1]
                        #elif hashBits[0] == 'copyright': self.copyright = hashBits[1]
                        #elif hashBits[0] == 'abbreviation': self.abbreviation = hashBits[1]
                        #elif hashBits[0] == 'language': self.language = hashBits[1]
                        #elif hashBits[0] == 'note': self.note = hashBits[1]
                        #elif hashBits[0] == 'columns': self.columns = hashBits[1]
                        #logging.warning( "Unknown UnboundBible meta-data field {!r} = {!r}".format( hashBits[0], hashBits[1] ) )
                    continue  # Just discard comment lines

                bits = line.split('\t')
                #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bookCode, chapterNumberString, verseNumberString, vText = bits
                elif len(bits) == 6:
                    bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 9:
                    NRSVA_bookCode, NRSVA_chapterNumberString, NRSVA_verseNumberString, bookCode, chapterNumberString, verseNumberString, subverseNumberString, sequenceNumberString, vText = bits
                elif len(bits) == 1 and self.givenName.startswith(
                        'lxx_a_parsing_'):
                    logging.warning(
                        _("Skipping bad {!r} line in {} {} {} {}:{}").format(
                            line, self.givenName, BBB, bookCode,
                            chapterNumberString, verseNumberString))
                    continue
                else:
                    vPrint('Quiet', debuggingThisModule,
                           "Unexpected number of bits", self.givenName, BBB,
                           bookCode, chapterNumberString, verseNumberString,
                           len(bits), bits)
                    halt

                if NRSVA_bookCode: assert len(NRSVA_bookCode) == 3
                if NRSVA_chapterNumberString:
                    assert NRSVA_chapterNumberString.isdigit()
                if NRSVA_verseNumberString:
                    assert NRSVA_verseNumberString.isdigit()

                if not bookCode and not chapterNumberString and not verseNumberString:
                    vPrint(
                        'Quiet', debuggingThisModule,
                        "Skipping empty line in {} {} {} {}:{}".format(
                            self.givenName, BBB, bookCode, chapterNumberString,
                            verseNumberString))
                    continue
                if BibleOrgSysGlobals.debugFlag: assert len(bookCode) == 3
                if BibleOrgSysGlobals.debugFlag:
                    assert chapterNumberString.isdigit()
                if BibleOrgSysGlobals.debugFlag:
                    assert verseNumberString.isdigit()

                if subverseNumberString:
                    logging.warning(
                        _("subverseNumberString {!r} in {} {} {}:{}").format(
                            subverseNumberString, BBB, bookCode,
                            chapterNumberString, verseNumberString))

                vText = vText.strip()  # Remove leading and trailing spaces
                if not vText: continue  # Just ignore blank verses I think
                if vText == '+':
                    continue  # Not sure what this means in basic_english JHN 1:38

                chapterNumber = int(chapterNumberString)
                verseNumber = int(verseNumberString)
                if sequenceNumberString:
                    if BibleOrgSysGlobals.debugFlag:
                        assert sequenceNumberString.isdigit()
                    sequenceNumber = int(sequenceNumberString)
                    if BibleOrgSysGlobals.debugFlag:                        assert sequenceNumber > lastSequence or \
self.givenName in ('gothic_latin', 'hebrew_bhs_consonants', 'hebrew_bhs_vowels', 'latvian_nt', 'ukrainian_1871',) # Why???
                    lastSequence = sequenceNumber

                if bookCode != lastBookCode:  # We've started a new book
                    if lastBookCode != -1:  # Better save the last book
                        self.stashBook(thisBook)
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromUnboundBibleCode(
                        bookCode)
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'Unbound Bible Book object'
                    thisBook.objectTypeString = 'Unbound'
                    lastBookCode = bookCode
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber:  # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info(
                            "Have chapter zero in {} {} {} {}:{}".format(
                                self.givenName, BBB, bookCode,
                                chapterNumberString, verseNumberString))
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Handle the verse info
                if verseNumber == lastVerseNumber and vText == lastVText:
                    logging.warning(
                        _("Ignored duplicate verse line in {} {} {} {}:{}").
                        format(self.givenName, BBB, bookCode,
                               chapterNumberString, verseNumberString))
                    continue
                if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                        '<') and self.givenName == 'basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning(
                        _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                          ).format(lastVerseNumber, verseNumber,
                                   self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning(
                            _("Ignored duplicated {} verse in {} {} {} {}:{}").
                            format(verseNumber, self.givenName, BBB, bookCode,
                                   chapterNumberString, verseNumberString))
                    else:
                        logging.warning(
                            _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                              ).format(verseNumber, self.givenName, BBB,
                                       bookCode, chapterNumberString,
                                       verseNumberString))
                thisBook.addLine('v', verseNumberString + ' ' + vText)
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.stashBook(thisBook)
        self.applySuppliedMetadata('Unbound')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()
Пример #5
0
class GreekNT( Bible ):
    """
    Class for handling a Greek NT object (which may contain one or more Bible books)

    Note: BBB is used in this class to represent the three-character referenceAbbreviation.
    """
    def __init__( self, sourceFilepath, givenName=None, encoding='utf-8' ) -> None:
        """
        Constructor: expects the filepath of the source folder.
        Loads (and crudely validates the file(s)) into ???.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = 'Greek NT Bible object'
        self.objectTypeString = 'GreekNT'

        # Now we can set our object variables
        self.sourceFilepath, self.givenName, self.encoding  = sourceFilepath, givenName, encoding

        self.title = self.version = self.date = None
        self.XMLTree = self.header = self.frontMatter = self.divs = self.divTypesString = None
        #self.bkData, self.USFMBooks = {}, {}
        self.lang = self.language = None

        # Do a preliminary check on the readability of our files
        self.possibleFilenames = []
        if os.path.isdir( self.sourceFilepath ): # We've been given a folder -- see if we can find the files
            # There's no standard for OSIS xml file naming
            fileList = os.listdir( self.sourceFilepath )
            #dPrint( 'Quiet', debuggingThisModule, len(fileList), fileList )
            # First try looking for OSIS book names
            for filename in fileList:
                if filename.lower().endswith('.txt'):
                    thisFilepath = os.path.join( self.sourceFilepath, filename )
                    #if BibleOrgSysGlobals.debugFlag: vPrint( 'Quiet', debuggingThisModule, "Trying {}…".format( thisFilepath ) )
                    if os.access( thisFilepath, os.R_OK ): # we can read that file
                        self.possibleFilenames.append( filename )
        elif not os.access( self.sourceFilepath, os.R_OK ):
            logging.critical( "GreekNT: File {!r} is unreadable".format( self.sourceFilepath ) )
            return # No use continuing
        #dPrint( 'Quiet', debuggingThisModule, self.possibleFilenames ); halt

        self.name = self.givenName
        #gNTfc = GreekNTFileConverter( self.sourceFilepath ) # Load and process the XML
        #gNTfc.loadMorphGNT()
        #self.books = gNTfc.bookData
    # end of __init__


    #def x__str__( self ) -> str:
        #"""
        #This method returns the string representation of a Bible book code.

        #@return: the name of a Bible object formatted as a string
        #@rtype: string
        #"""
        #result = "Greek Bible converter object"
        ##if self.title: result += ('\n' if result else '') + self.title
        ##if self.version: result += ('\n' if result else '') + "Version: {} ".format( self.version )
        ##if self.date: result += ('\n' if result else '') + "Date: {}".format( self.date )
        #if len(self.books)==1:
            #for BBB in self.books: break # Just get the first one
            #result += ('\n' if result else '') + "  " + _("Contains one book: {}").format( BBB )
        #else: result += ('\n' if result else '') + "  " + _("Number of books = {:,}").format( len(self.books) )
        #return result
    ## end of __str__


    def loadBooks( self ):
        """
        """
        vPrint( 'Info', debuggingThisModule, _("Loading Greek NT from {}…").format( self.sourceFilepath ) )
        for BBB in Greek.morphgntBookList:
            self.loadBook( BBB, Greek.morphgntFilenameDict[BBB] )
        vPrint( 'Verbose', debuggingThisModule, "{} books loaded.".format( len(self.books) ) )
        #if self.possibleFilenames: # then we possibly have multiple files, probably one for each book
            #for filename in self.possibleFilenames:
                #pathname = os.path.join( self.sourceFilepath, filename )
                #self.loadBook( pathname )
        #else: # most often we have all the Bible books in one file
            #self.loadFile( self.sourceFilepath )
        self.doPostLoadProcessing()
    # end of loadBooks

    def load( self ):
        self.loadBooks()


    def loadBook( self, BBB:str, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert len(bits) == 7
            #dPrint( 'Quiet', debuggingThisModule, bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #dPrint( 'Quiet', debuggingThisModule, b, c, v )

            POSCode = bits[1]
            assert len(POSCode) == 2
            assert POSCode in Greek.POSCodes.keys()

            parsingCode = bits[2]
            assert len(parsingCode) == 8
            #dPrint( 'Quiet', debuggingThisModule, parsingCode )
            for j,char in enumerate(parsingCode):
                assert char in Greek.parsingCodes[j]
            assert parsingCode[0] in Greek.personCodes
            assert parsingCode[1] in Greek.tenseCodes
            assert parsingCode[2] in Greek.voiceCodes
            assert parsingCode[3] in Greek.modeCodes
            assert parsingCode[4] in Greek.caseCodes
            assert parsingCode[5] in Greek.numberCodes
            assert parsingCode[6] in Greek.genderCodes
            assert parsingCode[7] in Greek.degreeCodes

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = 'Morph Greek NT Bible Book object'
        self.thisBook.objectTypeString = 'MorphGNT'
        filepath = os.path.join( self.sourceFilepath, filename )
        vPrint( 'Info', debuggingThisModule, "  Loading {}…".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #dPrint( 'Quiet', debuggingThisModule, 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #dPrint( 'Quiet', debuggingThisModule, unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.addLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.addLine( 'v', vn )
                        lastV = vn
                    self.thisBook.addLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.addLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #dPrint( 'Quiet', debuggingThisModule, reference,bits[1],bits[2] ); halt
            #if 0: #except:
                #logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                #if lineCount > 1: vPrint( 'Quiet', debuggingThisModule, 'Previous line was: ', lastLine )
                #else: vPrint( 'Quiet', debuggingThisModule, 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            vPrint( 'Verbose', debuggingThisModule, "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.stashBook( self.thisBook )
            #self.books[BBB] = self.thisBook
    # end of loadBook


    def analyzeWords( self ):
        """
        Go through the NT data and do some filing and sorting of the Greek words.

        Used by the interlinearizer app.
        """
        vPrint( 'Verbose', debuggingThisModule, "analyzeWords: have {} books in the loaded NT".format( len(self.books) ) )

        self.wordCounts = {} # Wordcount organised by BBB
        self.wordCounts['Total'] = 0
        self.actualWordsToNormalized, self.normalizedWordsToActual, self.normalizedWordsToParsing, self.lemmasToNormalizedWords = {}, {}, {}, {}
        for BBB in self.books:
            wordCount = len(self.books[BBB])
            self.wordCounts[BBB] = wordCount
            self.wordCounts['Total'] += wordCount
            vPrint( 'Verbose', debuggingThisModule, "  analyzeWords: {} has {} Greek words".format( BBB, wordCount ) )
            for reference,parsing,(punctuatedWord,actualWord,normalizedWord,lemma) in self.books[BBB]: # Stuff is: reference,parsing,words
                # File the actual words
                if actualWord not in self.actualWordsToNormalized:
                    self.actualWordsToNormalized[actualWord] = [([reference],normalizedWord,)]
                    #dPrint( 'Quiet', debuggingThisModule, "Saved", actualWord, "with", self.actualWordsToNormalized[actualWord] )
                else: # we've already had this word before
                    previous = self.actualWordsToNormalized[actualWord]
                    #dPrint( 'Quiet', debuggingThisModule, "had", actualWord, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #dPrint( 'Quiet', debuggingThisModule, "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        #dPrint( 'Quiet', debuggingThisModule, "  Found a new", normalizedWord, "normalized word for", actualWord, "was", previous )
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.actualWordsToNormalized[actualWord] = newList
                        #dPrint( 'Quiet', debuggingThisModule, "  now have", newList )
                # File the normalized words
                if normalizedWord not in self.normalizedWordsToActual:
                    self.normalizedWordsToActual[normalizedWord] = [([reference],actualWord,)]
                    #dPrint( 'Quiet', debuggingThisModule, "Saved", normalizedWord, "with", self.normalizedWordsToActual[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToActual[normalizedWord]
                    #dPrint( 'Quiet', debuggingThisModule, "had", normalizedWord, "before with", previous, "now with", reference, actualWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldActualWord in previous:
                        #dPrint( 'Quiet', debuggingThisModule, "  oRL", oldRefList, "oP", oldActualWord )
                        if actualWord == oldActualWord:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldActualWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldActualWord,) )
                    if not found:
                        newList.append( ([reference],actualWord,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToActual[normalizedWord] = newList
                        #dPrint( 'Quiet', debuggingThisModule, "  now have", newList )
                if normalizedWord not in self.normalizedWordsToParsing:
                    self.normalizedWordsToParsing[normalizedWord] = [([reference],parsing,)]
                    #dPrint( 'Quiet', debuggingThisModule, "Saved", normalizedWord, "with", self.normalizedWordsToParsing[normalizedWord] )
                else: # we've already had this word before
                    previous = self.normalizedWordsToParsing[normalizedWord]
                    #dPrint( 'Quiet', debuggingThisModule, "had", normalizedWord, "before with", previous, "now with", reference, parsing )
                    found = changed = False
                    newList = []
                    for oldRefList,oldParsing in previous:
                        #dPrint( 'Quiet', debuggingThisModule, "  oRL", oldRefList, "oP", oldParsing )
                        if parsing == oldParsing:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldParsing,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldParsing,) )
                    if not found:
                        newList.append( ([reference],parsing,) )
                        changed = True
                    if changed:
                        self.normalizedWordsToParsing[normalizedWord] = newList
                        #dPrint( 'Quiet', debuggingThisModule, "  now have", newList )
                # File the self.lemmasToNormalizedWords
                if lemma not in self.lemmasToNormalizedWords:
                    self.lemmasToNormalizedWords[lemma] = [([reference],normalizedWord,)]
                    #dPrint( 'Quiet', debuggingThisModule, "Saved", lemma, "with", self.lemmasToNormalizedWords[lemma] )
                else: # we've already had this word before
                    previous = self.lemmasToNormalizedWords[lemma]
                    #dPrint( 'Quiet', debuggingThisModule, "had", lemma, "before with", previous, "now with", reference, normalizedWord )
                    found = changed = False
                    newList = []
                    for oldRefList,oldnormalizedWord in previous:
                        #dPrint( 'Quiet', debuggingThisModule, "  oRL", oldRefList, "oP", oldnormalizedWord )
                        if normalizedWord == oldnormalizedWord:
                            assert not found
                            if reference not in oldRefList:
                                oldRefList.append( reference )
                                newList.append( (oldRefList,oldnormalizedWord,) )
                                changed = True
                            found = True
                        else: newList.append( (oldRefList,oldnormalizedWord,) )
                    if not found:
                        newList.append( ([reference],normalizedWord,) )
                        changed = True
                    if changed:
                        self.lemmasToNormalizedWords[lemma] = newList
                        #dPrint( 'Quiet', debuggingThisModule, "  now have", newList )
        vPrint( 'Info', debuggingThisModule, "analyzeWords: NT has {} Greek words".format( self.wordCounts['Total'] ) )
        vPrint( 'Info', debuggingThisModule, "analyzeWords: NT has {} actual Greek words".format( len(self.actualWordsToNormalized) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                vPrint( 'Quiet', debuggingThisModule, "  ", aW, self.actualWordsToNormalized[aW] )
                if j==6: break
        vPrint( 'Info', debuggingThisModule, "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToActual) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToActual.keys() ):
                vPrint( 'Quiet', debuggingThisModule, "  ", nW, self.normalizedWordsToActual[nW] )
                if j==6: break
        vPrint( 'Info', debuggingThisModule, "analyzeWords: NT has {} normalized Greek words".format( len(self.normalizedWordsToParsing) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,nW in enumerate( self.normalizedWordsToParsing.keys() ):
                vPrint( 'Quiet', debuggingThisModule, "  ", nW, self.normalizedWordsToParsing[nW] )
                if j==6: break
        vPrint( 'Info', debuggingThisModule, "analyzeWords: NT has {} Greek self.lemmasToNormalizedWords".format( len(self.lemmasToNormalizedWords) ) )
        if BibleOrgSysGlobals.verbosityLevel > 3:
            for j,lem in enumerate( self.lemmasToNormalizedWords.keys() ):
                vPrint( 'Quiet', debuggingThisModule, "  ", lem, self.lemmasToNormalizedWords[lem] )
                if j==6: break
        if 0:
            vPrint( 'Quiet', debuggingThisModule, "The following actual words have multiple normalized forms:" )
            for j,aW in enumerate( self.actualWordsToNormalized.keys() ):
                if len(self.actualWordsToNormalized[aW])>1:
                    vPrint( 'Quiet', debuggingThisModule, "  ", aW )
                    for entry in self.actualWordsToNormalized[aW]:
                        vPrint( 'Quiet', debuggingThisModule, "    ", entry[1], self.normalizedWordsToParsing[entry[1]], entry[0] )
Пример #6
0
    def loadBook( self, BBB:str, filename, encoding='utf-8' ):

        def unpackLine( line ):
            # Should be seven parts in the line
            #   0 book/chapter/verse
            #   1 part of speech (POS)
            #   2 parsing code
            #   3 text (including punctuation)
            #   4 word (with punctuation stripped)
            #   5 normalized word
            #   6 lemma
            # e.g., 180101 N- ----NSM- Παῦλος Παῦλος Παῦλος Παῦλος
            #       180102 N- ----DSF- ⸀ἀδελφῇ ἀδελφῇ ἀδελφῇ ἀδελφή
            #       180102 P- -------- κατ’ κατ’ κατά κατά
            #       180102 N- ----DSF- ἐκκλησίᾳ· ἐκκλησίᾳ ἐκκλησίᾳ ἐκκλησία
            bits = line.split()
            assert len(bits) == 7
            #dPrint( 'Quiet', debuggingThisModule, bits )

            bn, cn, vn = bits[0][0:2], bits[0][2:4], bits[0][4:6]
            if bn[0]=='0': bn = bn[1:] # Remove any leading zero
            if cn[0]=='0': cn = cn[1:] # Remove any leading zero
            if vn[0]=='0': vn = vn[1:] # Remove any leading zero
            #dPrint( 'Quiet', debuggingThisModule, b, c, v )

            POSCode = bits[1]
            assert len(POSCode) == 2
            assert POSCode in Greek.POSCodes.keys()

            parsingCode = bits[2]
            assert len(parsingCode) == 8
            #dPrint( 'Quiet', debuggingThisModule, parsingCode )
            for j,char in enumerate(parsingCode):
                assert char in Greek.parsingCodes[j]
            assert parsingCode[0] in Greek.personCodes
            assert parsingCode[1] in Greek.tenseCodes
            assert parsingCode[2] in Greek.voiceCodes
            assert parsingCode[3] in Greek.modeCodes
            assert parsingCode[4] in Greek.caseCodes
            assert parsingCode[5] in Greek.numberCodes
            assert parsingCode[6] in Greek.genderCodes
            assert parsingCode[7] in Greek.degreeCodes

            return (bn,cn,vn,), (POSCode,parsingCode,), (bits[3],bits[4],bits[5],bits[6],)
        # end of unpackLine

        self.thisBook = BibleBook( self, BBB )
        self.thisBook.objectNameString = 'Morph Greek NT Bible Book object'
        self.thisBook.objectTypeString = 'MorphGNT'
        filepath = os.path.join( self.sourceFilepath, filename )
        vPrint( 'Info', debuggingThisModule, "  Loading {}…".format( filename ) )
        lastLine, lineCount = '', 0
        lastC = lastV = None
        with open( filepath, encoding=encoding ) as myFile: # Automatically closes the file when done
            if 1: #try:
                for line in myFile:
                    lineCount += 1
                    if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
                        logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character
                    #if not line: continue # Just discard blank lines
                    lastLine = line
                    #dPrint( 'Quiet', debuggingThisModule, 'gNT file line is "' + line + '"' )
                    #if line[0]=='#': continue # Just discard comment lines
                    unpackedLine = unpackLine( line )
                    #dPrint( 'Quiet', debuggingThisModule, unpackedLine )
                    ref, grammar, words = unpackedLine
                    bn, cn, vn = ref
                    POSCode, parsingCode = grammar
                    word1, word2, word3, word4 = words
                    if cn != lastC:
                        self.thisBook.addLine( 'c', cn )
                        lastC, lastV = cn, None
                    if vn != lastV:
                        self.thisBook.addLine( 'v', vn )
                        lastV = vn
                    self.thisBook.addLine( 'vw', "{}/{}/{}/{}".format( word1, word2, word3, word4 ) )
                    self.thisBook.addLine( 'g', "{}/{}".format( POSCode, parsingCode ) )
                    #reference = BBB,bits[0][1],bits[0][2], # Put the BBB into the reference
                    #lineTuples.append( (reference,bits[1],bits[2],) )
                    #dPrint( 'Quiet', debuggingThisModule, reference,bits[1],bits[2] ); halt
            #if 0: #except:
                #logging.critical( "Invalid line in " + filepath + " -- line ignored at " + str(lineCount) )
                #if lineCount > 1: vPrint( 'Quiet', debuggingThisModule, 'Previous line was: ', lastLine )
                #else: vPrint( 'Quiet', debuggingThisModule, 'Possible encoding error -- expected', encoding )
        if self.thisBook:
            vPrint( 'Verbose', debuggingThisModule, "    {} words loaded from {}".format( len(self.thisBook), filename ) )
            self.stashBook( self.thisBook )
Пример #7
0
    def loadBook(self, BBB: str):
        """
        Load the requested book out of the SQLite3 database.
        """
        fnPrint(debuggingThisModule, "loadBook( {} )".format(BBB))
        assert self.preloadDone

        if BBB in self.books:
            dPrint('Quiet', debuggingThisModule,
                   "  {} is already loaded -- returning".format(BBB))
            return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading MySwordBible {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            vPrint(
                'Quiet', debuggingThisModule,
                _("MySwordBible: Loading {} from {}…").format(
                    BBB, self.sourceFilepath))

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'BOTH', 'GEN'
        #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
        #testament, BBB = 'OT', 'GEN'
        #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'NT', 'MAT'
        #booksExpected, textLineCountExpected = 1, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #dPrint( 'Quiet', debuggingThisModule, "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #dPrint( 'Quiet', debuggingThisModule, row )
                line = None
            #dPrint( 'Quiet', debuggingThisModule, nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #dPrint( 'Quiet', debuggingThisModule, "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C <= numC:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1
                else:  # Save this book now
                    if haveLines:
                        vPrint('Info', debuggingThisModule, "  MySword saving",
                               BBB)
                        self.stashBook(thisBook)
                    #else: vPrint( 'Quiet', debuggingThisModule, "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
    def load(self):
        """
        Load a single source file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganisationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganisationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search('^; TITLE:\\s', line)
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            vPrint(
                                'Quiet', debuggingThisModule,
                                "First line got type {!r} match from {!r}".
                                format(match.group(0), line))
                    else:
                        vPrint(
                            'Verbose', debuggingThisModule,
                            "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}"
                            .format(firstLine, thisFilename))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue

                #vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith('; TITLE:'):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith('; ABBREVIATION:'):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith('; HAS ITALICS'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES:'):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS REDLETTER'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0] == ';':
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}"
                        .format(line))
                    continue  # Just discard comment lines

                # Process the main segment
                if line.startswith('$$ '):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) )
                    if pointer and pointer[0] == '{' and pointer[-1] == '}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else:  # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split(' ', 1)
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split(
                                ':')
                            chapterNumber = int(chapterNumberString)
                            verseNumber = int(verseNumberString)
                            if bookCode != lastBookCode:  # We've started a new book
                                if bookCode in ('Ge', ): BBB = 'GEN'
                                elif bookCode in ('Le', ): BBB = 'LEV'
                                elif bookCode in ('La', ):
                                    BBB = 'LAM'
                                    ##elif bookCode in ('Es',): BBB = 'EST'
                                    ##elif bookCode in ('Pr',): BBB = 'PRO'
                                    #elif bookCode in ('So',): BBB = 'SNG'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #vPrint( 'Quiet', debuggingThisModule, "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText(
                                        bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOS81.getBBBFromText(
                                            bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOSx.getBBBFromText(
                                            bookCode)  # Try to guess
                                    #vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) )
                        else:
                            vPrint('Quiet', debuggingThisModule,
                                   "Unexpected number of bits", self.givenName,
                                   BBB, bookCode, chapterNumberString,
                                   verseNumberString, len(bits), bits)
                    continue  # Just save the pointer information which refers to the text on the next line
                else:  # it's not a $$ line
                    text = line
                    #vPrint( 'Quiet', debuggingThisModule, "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else
                                             '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace('(<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>)', '\\x*')
                        vText = vText.replace('<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>', '\\x*')
                        #if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher vText', repr(vText) )
                        #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search('\\{(.+?)\\}', vText)
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format(
                                chapterNumber, verseNumber, match.group(1))
                            vText = vText[:match.start(
                            )] + footnoteText + vText[
                                match.end():]  # Replace this footnote
                            #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\{(.+?)\\}', vText)
                        # Convert [stuff] to added fields
                        match = re.search('\\[(.+?)\\]', vText)
                        while match:
                            addText = '\\add {}\\add*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\[(.+?)\\]', vText)
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search('\\+r/(.+?)-r/', vText)
                        while match:
                            addText = '\\wj {}\\wj*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\+r/(.+?)-r/', vText)
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning(
                                    "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}"
                                    .format(BBB, chapterNumberString,
                                            verseNumberString, vText))
                                break

                if bookCode:
                    if bookCode != lastBookCode:  # We've started a new book
                        if lastBookCode != -1:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "ForgeForSwordSearcherBible could not figure out {!r} book code"
                                .format(bookCode))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCode,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCode yet
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}"
                        .format(line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #vPrint( 'Quiet', debuggingThisModule, "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata(
                'Forge4SS')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Пример #9
0
    def load(self):
        """
        Load all the books out of the SQLite3 database.
        """
        fnPrint(debuggingThisModule, "load()…")
        assert self.preloadDone

        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[
                'MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #dPrint( 'Quiet', debuggingThisModule, "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #dPrint( 'Quiet', debuggingThisModule, row )
                line = None
            #dPrint( 'Quiet', debuggingThisModule, nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #dPrint( 'Quiet', debuggingThisModule, "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        vPrint('Verbose', debuggingThisModule,
                               "  MySword saving", BBB, bookCount + 1)
                        self.stashBook(thisBook)
                    #else: vPrint( 'Quiet', debuggingThisModule, "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BibleOrganisationalSystem.getNumVersesList(
                        BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        del self.cursor
        self.applySuppliedMetadata('MySword')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()
Пример #10
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        vPrint( 'Info', debuggingThisModule, _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganisationalSystem( 'GENERIC-KJV-80-ENG' )
        if BOSx is None: BOSx = BibleOrganisationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None
        lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line )
                    if match: vplType = 1
                    else:
                        match = re.search( '^(\\d{8})\\s', line )
                        if match: vplType = 2
                        else:
                            match = re.search( '^# language_name:\\s', line )
                            if match: vplType = 3
                            #else:
                                #match = re.search( '^; TITLE:\\s', line )
                                #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            vPrint( 'Quiet', debuggingThisModule, "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) )
                    else:
                        vPrint( 'Verbose', debuggingThisModule, "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue
                    #vPrint( 'Quiet', debuggingThisModule, 'vplType', vplType )

                #vPrint( 'Quiet', debuggingThisModule, 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if   line.startswith( '# language_name:' ):
                        string = line[16:].strip()
                        if string and string != 'Not available': settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith( '# closest ISO 639-3:' ):
                        string = line[20:].strip()
                        if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith( '# year_short:' ):
                        string = line[13:].strip()
                        if string and string != 'Not available': settingsDict['Year.short'] = string
                        continue
                    elif line.startswith( '# year_long:' ):
                        string = line[12:].strip()
                        if string and string != 'Not available': settingsDict['Year.long'] = string
                        continue
                    elif line.startswith( '# title:' ):
                        string = line[8:].strip()
                        if string and string != 'Not available': settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith( '# URL:' ):
                        string = line[6:].strip()
                        if string and string != 'Not available': settingsDict['URL'] = string
                        continue
                    elif line.startswith( '# copyright_short:' ):
                        string = line[18:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith( '# copyright_long:' ):
                        string = line[17:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.long'] = string
                        continue
                    elif line[0]=='#':
                        logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) )
                        continue # Just discard comment lines
                #elif vplType == 4:
                    #if line.startswith( '; TITLE:' ):
                        #string = line[8:].strip()
                        #if string: settingsDict['TITLE'] = string
                        #continue
                    #elif line.startswith( '; ABBREVIATION:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['ABBREVIATION'] = string
                        #continue
                    #elif line.startswith( '; HAS ITALICS:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_ITALICS'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES' ):
                        #string = line[14:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS REDLETTER:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_REDLETTER'] = string
                        #continue
                    #elif line[0]==';':
                        #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                        #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split( ' ', 2 )
                    #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCodeText, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split( ':' )
                        #vPrint( 'Quiet', debuggingThisModule, "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) )
                        if chapterNumberString == '': chapterNumberString = '1' # Handle a bug in some single chapter books in VPL
                    else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )

                    if not bookCodeText and not chapterNumberString and not verseNumberString:
                        vPrint( 'Quiet', debuggingThisModule, "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        continue
                    if BibleOrgSysGlobals.debugFlag: assert 2  <= len(bookCodeText) <= 4
                    if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCodeText, BBB, chapterNumberString, verseNumberString ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int( chapterNumberString )
                    verseNumber = int( verseNumberString )

                    if bookCodeText != lastBookCodeText: # We've started a new book
                        lastBBB = BBB
                        #if bookCodeText in ('Ge',): BBB = 'GEN'
                        if bookCodeText == 'Le' and lastBBB == 'GEN': BBB = 'LEV'
                        elif bookCodeText in ('Jud',) and lastBBB == 'JOS': BBB = 'JDG'
                        #elif bookCodeText in ('Es',): BBB = 'EST'
                        #elif bookCodeText in ('Pr',): BBB = 'PRO'
                        #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG'
                        #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM'
                        #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP'
                        #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT
                        #elif bookCodeText in ('Jude',): BBB = 'JDE'
                        #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ'
                        #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN'
                        else:
                            BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                            if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                            if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                            if not BBB: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( bookCodeText )  # Try to guess
                        if not BBB:
                            logging.critical( "VPL Bible: Unable to determine book code from text {!r} after {!r}={}".format( bookCodeText, lastBookCodeText, lastBBB ) )
                            halt

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0]=='«':
                        #vPrint( 'Quiet', debuggingThisModule, "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB=='PSA' and verseNumberString=='1': # Psalm title
                            vBits = vText[1:].split( '»' )
                            #vPrint( 'Quiet', debuggingThisModule, "vBits", vBits )
                            thisBook.addLine( 'd', vBits[0] ) # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                        #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        #continue
                    if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                        #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                        #if vText == lastVText:
                            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        #else:
                            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                elif vplType in (2,3):
                    bits = line.split( '\t', 1 )
                    #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:]
                    #vPrint( 'Quiet', debuggingThisModule, bookNumberString, chapterNumberString, verseNumberString )
                    chapterNumberString = chapterNumberString.lstrip( '0' ) # Remove leading zeroes
                    verseNumberString = verseNumberString.lstrip( '0' ) # Remove leading zeroes
                    bookCodeText, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCodeText != lastBookCodeText: # We've started a new book
                        lastBBB = BBB
                        bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS',
                                76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2',
                                85:'PSS', 86:'ODE', }
                        if 1 <= bookCodeText <= 66: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookCodeText )
                        else: BBB = bnDict[bookCodeText]

                #elif vplType == 4:
                    #if line.startswith( '$$ ' ):
                        #if metadataName and metadataContents:
                            #settingsDict[metadataName] = metadataContents
                            #metadataName = None
                        #pointer = line[3:]
                        ##vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) )
                        #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                            #metadataName = pointer[1:-1]
                            #if metadataName:
                                ##vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) )
                                #metadataContents = ''
                        #else: # let's assume it's a BCV reference
                            #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                            #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                            #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                            #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                            #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                            #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                            #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                            #B_CV_Bits = pointer.split( ' ', 1 )
                            #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                                #bookCodeText, CVString = B_CV_Bits
                                #chapterNumberString, verseNumberString = CVString.split( ':' )
                                #chapterNumber = int( chapterNumberString )
                                #verseNumber = int( verseNumberString )
                                #if bookCodeText != lastBookCodeText: # We've started a new book
                                    #if bookCodeText in ('Ge',): BBB = 'GEN'
                                    #elif bookCodeText in ('Le',): BBB = 'LEV'
                                    #elif bookCodeText in ('La',): BBB = 'LAM'
                                    #else:
                                        ##vPrint( 'Quiet', debuggingThisModule, "4bookCodeText =", repr(bookCodeText) )
                                        ##BBB = BOS.getBBBFromText( bookCodeText )  # Try to guess
                                        #BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                                        #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                                        #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                                        ##vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) )
                            #else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )
                        #continue # Just save the pointer information which refers to the text on the next line
                    #else: # it's not a $$ line
                        #text = line
                        ##vPrint( 'Quiet', debuggingThisModule, "text", repr(text) )
                        #if metadataName:
                            #metadataContents += ('\n' if metadataContents else '') + text
                            #continue
                        #else:
                            #vText = text
                            ## Handle bits like (<scripref>Pr 2:7</scripref>)
                            #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                            #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                            ##if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'VPL vText', repr(vText) )
                            #if vplType == 4: # Forge for SwordSearcher
                                ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                                ## Convert {stuff} to footnotes
                                #match = re.search( '\\{(.+?)\\}', vText )
                                #while match:
                                    #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                                    #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                                    ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\{(.+?)\\}', vText )
                                ## Convert [stuff] to added fields
                                #match = re.search( '\\[(.+?)\\]', vText )
                                #while match:
                                    #addText = '\\add {}\\add*'.format( match.group(1) )
                                    #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                                    ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\[(.+?)\\]', vText )
                                #for badChar in '{}[]':
                                    #if badChar in vText:
                                        #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                        #break

                else:
                    logging.critical( 'Unknown VPL type {}'.format( vplType ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt

                if bookCodeText:
                    if bookCodeText != lastBookCodeText: # We've started a new book
                        if lastBookCodeText is not None: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCodeText = bookCodeText
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCodeText ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCodeText yet
                    logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #vPrint( 'Quiet', debuggingThisModule, "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Пример #11
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        vPrint( 'Info', debuggingThisModule, _("Loading {}…").format( self.sourceFilepath ) )

        lastLine, lineCount = '', 0
        BBB = None
        lastBookNumber = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        quoted = None
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                    #logging.info( "      CSVBible.load: Detected Unicode Byte Order Marker (BOM)" )
                    #line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if line==' ': continue # Handle special case which has blanks on every second line -- HACK
                lastLine = line
                #vPrint( 'Quiet', debuggingThisModule, "CSV file line {} is {}".format( lineCount, repr(line) ) )
                if line[0]=='#': continue # Just discard comment lines
                if lineCount==1:
                    if line.startswith( '"Book",' ):
                        quoted = True
                        continue # Just discard header line
                    elif line.startswith( 'Book,' ):
                        quoted = False
                        continue # Just discard header line

                bits = line.split( ',', 3 )
                #vPrint( 'Quiet', debuggingThisModule, lineCount, self.givenName, BBB, bits )
                if len(bits) == 4:
                    bString, chapterNumberString, verseNumberString, vText = bits
                    #vPrint( 'Quiet', debuggingThisModule, "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )
                else:
                    logging.critical( "Unexpected number of bits {} {} {} {}:{} {!r} {} {}".format( self.givenName, BBB, bString, chapterNumberString, verseNumberString, vText, len(bits), bits ) )

                # Remove quote marks from these strings
                if quoted:
                    if len(bString)>=2 and bString[0]==bString[-1] and bString[0] in '"\'': bString = bString[1:-1]
                    if len(chapterNumberString)>=2 and chapterNumberString[0]==chapterNumberString[-1] and chapterNumberString[0] in '"\'': chapterNumberString = chapterNumberString[1:-1]
                    if len(verseNumberString)>=2 and verseNumberString[0]==verseNumberString[-1] and verseNumberString[0] in '"\'': verseNumberString = verseNumberString[1:-1]
                    if len(vText)>=2 and vText[0]==vText[-1] and vText[0] in '"\'': vText = vText[1:-1]
                    #vPrint( 'Quiet', debuggingThisModule, "bString, chapterNumberString, verseNumberString, vText", bString, chapterNumberString, verseNumberString, vText )

                #if not bookCode and not chapterNumberString and not verseNumberString:
                    #vPrint( 'Quiet', debuggingThisModule, "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #continue
                #if BibleOrgSysGlobals.debugFlag: assert 2  <= len(bookCode) <= 4
                #if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit()
                #if BibleOrgSysGlobals.debugFlag: assert verseNumberString.isdigit()
                bookNumber = int( bString )
                chapterNumber = int( chapterNumberString )
                verseNumber = int( verseNumberString )

                if bookNumber != lastBookNumber: # We've started a new book
                    if lastBookNumber != -1: # Better save the last book
                        self.stashBook( thisBook )
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookNumber )  # Try to guess
                    assert BBB
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = 'CSV Bible Book object'
                    thisBook.objectTypeString = 'CSV'
                    lastBookNumber = bookNumber
                    lastChapterNumber = lastVerseNumber = -1

                if chapterNumber != lastChapterNumber: # We've started a new chapter
                    if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                    if chapterNumber == 0:
                        logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                    thisBook.addLine( 'c', chapterNumberString )
                    lastChapterNumber = chapterNumber
                    lastVerseNumber = -1

                # Now we have to convert any possible RTF codes to our internal codes
                vTextOriginal = vText
                # First do special characters
                vText = vText.replace( '\\ldblquote', '“' ).replace( '\\rdblquote', '”' ).replace( '\\lquote', '‘' ).replace( '\\rquote', '’' )
                vText = vText.replace( '\\emdash', '—' ).replace( '\\endash', '–' )
                # Now do Unicode characters
                while True: # Find patterns like \\'d3
                    match = re.search( r"\\'[0-9a-f][0-9a-f]", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()], 16 ) # Convert two hex characters to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                while True: # Find patterns like \\u253?
                    match = re.search( r"\\u[1-2][0-9][0-9]\?", vText )
                    if not match: break
                    i = int( vText[match.start()+2:match.end()-1] ) # Convert three digits to decimal
                    vText = vText[:match.start()] + chr( i ) + vText[match.end():]
                #if vText != vTextOriginal: vPrint( 'Quiet', debuggingThisModule, repr(vTextOriginal) ); vPrint( 'Quiet', debuggingThisModule, repr(vText) )

                ## Handle special formatting
                ##   [brackets] are for Italicized words
                ##   <brackets> are for the Words of Christ in Red
                ##   «brackets»  are for the Titles in the Book  of Psalms.
                #vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                    #.replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                #if vText and vText[0]=='«':
                    #assert BBB=='PSA' and verseNumberString=='1'
                    #vBits = vText[1:].split( '»' )
                    ##vPrint( 'Quiet', debuggingThisModule, "vBits", vBits )
                    #thisBook.addLine( 'd', vBits[0] ) # Psalm title
                    #vText = vBits[1].lstrip()

                # Handle the verse info
                if verseNumber==lastVerseNumber and vText==lastVText:
                    logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                    continue
                if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                    # Move Psalm titles to verse zero
                    verseNumber = 0
                if verseNumber < lastVerseNumber:
                    logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                elif verseNumber == lastVerseNumber:
                    if vText == lastVText:
                        logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                    else:
                        logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookNumber, chapterNumberString, verseNumberString ) )
                thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                lastVText = vText
                lastVerseNumber = verseNumber

        # Save the final book
        self.stashBook( thisBook )
        self.doPostLoadProcessing()
Пример #12
0
    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        vPrint('Verbose', debuggingThisModule,
               _("Validating OpenSong XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBBFromText(
                bookName)  # Booknames are usually in English
            if not BBB:  # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText(
                    bookName)  # Try non-English booknames
                #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB )
            if BBB:
                vPrint('Info', debuggingThisModule,
                       _("Validating {} {}…").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                if not USFMAbbreviation:
                    logging.critical(
                        f"Unable to find USFM abbreviation for '{BBB}'")
                    if BibleOrgSysGlobals.strictCheckingFlag: halt
                    USFMAbbreviation = 'XXA'
                thisBook.addLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     programNameVersion))
                thisBook.addLine('h', bookName)
                thisBook.addLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        BibleOrgSysGlobals.checkXMLNoText(
                            element, sublocation, 'j3jd')
                        BibleOrgSysGlobals.checkXMLNoTail(
                            element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find {!r} but got {!r}".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                vPrint('Info', debuggingThisModule,
                       "  Saving {} into results…".format(BBB))
                self.stashBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: {!r}").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName
Пример #13
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        def decodeVerse(encodedVerseString):
            """
            Decodes the verse which has @ format codes.
            """
            verseString = encodedVerseString
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            if verseString.startswith(
                    '@@'):  # This simply means that encoding follows
                verseString = verseString[2:]
            # Paragraph markers (marked now with double backslash)
            verseString = verseString.replace('@^', '\\\\p ')
            verseString = verseString.replace('@0', '\\\\m ')
            verseString = verseString.replace('@1', '\\\\q1 ').replace(
                '@2', '\\\\q2 ').replace('@3',
                                         '\\\\q3 ').replace('@4', '\\q4 ')
            verseString = verseString.replace('@8', '\\\\m ')
            # Character markers (marked now with single backslash)
            verseString = verseString.replace('@6',
                                              '\\wj ').replace('@5', '\\wj*')
            verseString = verseString.replace('@9', '\\add ').replace(
                '@7', '\\add*')  # or \\i ???
            verseString = re.sub(r'@<f([0-9])@>@/', r'\\ff\1', verseString)
            verseString = re.sub(r'@<x([0-9])@>@/', r'\\xx\1', verseString)
            #dPrint( 'Quiet', debuggingThisModule, repr( verseString ) )
            assert '@' not in verseString
            return verseString

        # end of decodeVerse

        # Read all the lines into bookDict
        lastLine, lineCount = '', 0
        bookNameDict, bookDict, footnoteDict, xrefDict, headingDict = {}, {}, {}, {}, {}
        BBB = bookNumberString = chapterNumberString = verseNumberString = encodedVerseString = ''
        lastBBB = lastBookNumberString = lastChapterNumberString = lastVerseNumberString = None
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                #if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
                #logging.info( "      YETBible.load: Detected Unicode Byte Order Marker (BOM)" )
                #line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                if line and line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                lastLine = line
                #dPrint( 'Quiet', debuggingThisModule, 'YETBible file line is "' + line + '"' )

                bits = line.split('\t')
                #dPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits )
                if bits[0] == 'info':
                    assert len(bits) == 3
                    if bits[1] == 'shortName':
                        shortName = bits[2]
                        self.name = shortName
                    elif bits[1] == 'longName':
                        longName = bits[2]
                    elif bits[1] == 'description':
                        description = bits[2]
                    elif bits[1] == 'locale':
                        locale = bits[2]
                        assert 2 <= len(locale) <= 3
                        if locale == 'in':
                            locale = 'id'  # Fix a quirk in the locale encoding
                    else:
                        logging.warning( _("YETBible: unknown {} info field in {} {} {}:{}") \
                            .format( repr(bits[1]), BBB, chapterNumberString, verseNumberString ) )
                    continue
                elif bits[0] == 'book_name':
                    assert 3 <= len(bits) <= 4
                    thisBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
                        bits[1])
                    if len(bits) == 3:
                        bookNameDict[thisBBB] = bits[2], ''
                    elif len(bits) == 4:
                        bookNameDict[thisBBB] = bits[2], bits[3]
                    continue
                elif bits[0] == 'verse':
                    assert len(bits) == 5
                    bookNumberString, chapterNumberString, verseNumberString, encodedVerseString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert bookNumberString.isdigit()
                        assert chapterNumberString.isdigit()
                        assert verseNumberString.isdigit()
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} = {}".format( BBB, chapterNumberString, verseNumberString, repr(encodedVerseString) ) )
                    if BBB != lastBBB:  # We have a new book
                        if lastBBB is not None:  # We have a completed book to save
                            bookDict[lastBBB] = bookLines
                        assert BBB in bookNameDict
                        bookLines = {}  # Keys are (C,V) strings
                    verseString = decodeVerse(encodedVerseString)
                    bookLines[(chapterNumberString, verseNumberString
                               )] = verseString  # Just store it for now
                    lastBBB = BBB
                    continue
                elif bits[0] == 'pericope':
                    assert len(bits) == 5
                    bookNumberString, chapterNumberString, verseNumberString, encodedHeadingString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert bookNumberString.isdigit()
                        assert chapterNumberString.isdigit()
                        assert verseNumberString.isdigit()
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    headingString = encodedHeadingString.replace(
                        '@9', '\\it ').replace('@7', '\\it*')
                    #dPrint( 'Quiet', debuggingThisModule, repr(encodedHeadingString), repr(headingString) )
                    assert '@' not in headingString
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = headingString, [
                                 ]  # Blank refList
                    continue
                elif bits[
                        0] == 'parallel':  # These lines optionally follow pericope lines
                    assert len(bits) == 2
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    refList.append(bits[1])
                    #dPrint( 'Quiet', debuggingThisModule, "parallel2", repr(heading), refList )
                    headingDict[(BBB, chapterNumberString,
                                 verseNumberString)] = heading, refList
                    continue
                elif bits[0] == 'xref':
                    assert len(bits) == 6
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert bookNumberString.isdigit()
                        assert chapterNumberString.isdigit()
                        assert verseNumberString.isdigit()
                        assert indexNumberString.isdigit()
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    noteString = re.sub(
                        r'@<ta(.+?)@>', r'', noteString
                    )  # Get rid of these encoded BCV references for now
                    noteString = re.sub(
                        r'@<to(.+?)@>', r'', noteString
                    )  # Get rid of these OSIS BCV references for now
                    noteString = noteString.replace('@/', '')
                    #dPrint( 'Quiet', debuggingThisModule, repr(encodedNoteString), repr(noteString) )
                    assert '@' not in noteString
                    xrefDict[(BBB, chapterNumberString, verseNumberString,
                              indexNumberString)] = noteString
                    continue
                elif bits[0] == 'footnote':
                    assert len(bits) == 6
                    bookNumberString, chapterNumberString, verseNumberString, indexNumberString, encodedNoteString = bits[
                        1:]
                    if BibleOrgSysGlobals.debugFlag:
                        assert bookNumberString.isdigit()
                        assert chapterNumberString.isdigit()
                        assert verseNumberString.isdigit()
                        assert indexNumberString.isdigit()
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
                        bookNumberString)
                    noteString = encodedNoteString.replace('@9',
                                                           '\\it ').replace(
                                                               '@7', '\\it*')
                    assert '@' not in noteString
                    footnoteDict[(BBB, chapterNumberString, verseNumberString,
                                  indexNumberString)] = noteString
                    continue
                else:
                    vPrint('Quiet', debuggingThisModule,
                           "YETBible: Unknown line type", self.givenName,
                           BBB, chapterNumberString, verseNumberString,
                           len(bits), bits)
                    halt
            bookDict[lastBBB] = bookLines  # Save the last book

        # Now process the books
        for BBB, bkData in bookDict.items():
            #dPrint( 'Quiet', debuggingThisModule, "Processing", BBB )
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'YET Bible Book object'
            thisBook.objectTypeString = 'YET'
            lastChapterNumberString = None
            for (chapterNumberString,
                 verseNumberString), verseString in bkData.items():
                # Insert headings (can only occur before verses)
                if (BBB, chapterNumberString,
                        verseNumberString) in headingDict:
                    heading, refList = headingDict[(BBB, chapterNumberString,
                                                    verseNumberString)]
                    #dPrint( 'Quiet', debuggingThisModule, 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                    thisBook.addLine('s', heading)
                    if refList:
                        refString = ""
                        #dPrint( 'Quiet', debuggingThisModule, 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList )
                        for ref in refList:
                            refString += ('; ' if refString else '') + ref
                        #dPrint( 'Quiet', debuggingThisModule, 's', BBB, chapterNumberString, verseNumberString, repr(heading), refList, repr(refString) )
                        thisBook.addLine('r', '(' + refString + ')')
                # Insert footnotes and cross-references
                while '\\ff' in verseString:
                    #dPrint( 'Quiet', debuggingThisModule, "footnote", repr(verseString) )
                    fIx = verseString.index('\\ff')
                    caller = verseString[fIx + 3]
                    #dPrint( 'Quiet', debuggingThisModule, "fcaller", repr(caller) )
                    assert caller.isdigit()
                    note = footnoteDict[(BBB, chapterNumberString,
                                         verseNumberString, caller)]
                    #dPrint( 'Quiet', debuggingThisModule, "fnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\f + \\ft ' + note + '\\f*' + verseString[
                                                  fIx + 4:]
                    #dPrint( 'Quiet', debuggingThisModule, "fvS", repr(verseString) )
                while '\\xx' in verseString:
                    #dPrint( 'Quiet', debuggingThisModule, "xref", repr(verseString) )
                    fIx = verseString.index('\\xx')
                    caller = verseString[fIx + 3]
                    #dPrint( 'Quiet', debuggingThisModule, "xcaller", repr(caller) )
                    assert caller.isdigit()
                    note = xrefDict[(BBB, chapterNumberString,
                                     verseNumberString, caller)]
                    #dPrint( 'Quiet', debuggingThisModule, "xnote", repr(note) )
                    verseString = verseString[:
                                              fIx] + '\\x - \\xt ' + note + '\\x*' + verseString[
                                                  fIx + 4:]
                    #dPrint( 'Quiet', debuggingThisModule, "xvS", repr(verseString) )
                # Save the Bible data fields
                if chapterNumberString != lastChapterNumberString:
                    thisBook.addLine('c', chapterNumberString)
                    lastChapterNumberString = chapterNumberString
                #dPrint( 'Quiet', debuggingThisModule, BBB, chapterNumberString, verseNumberString, repr(verseString) )
                if verseString.startswith(
                        '\\\\'):  # It's an initial paragraph marker
                    if verseString[3] == ' ':
                        marker, verseString = verseString[2], verseString[4:]
                    elif verseString[4] == ' ':
                        marker, verseString = verseString[2:4], verseString[5:]
                    else:
                        halt
                    #dPrint( 'Quiet', debuggingThisModule, '', '\\'+marker )
                    thisBook.addLine(marker, '')
                assert not verseString.startswith('\\\\')
                bits = verseString.split(
                    '\\\\'
                )  # Split on paragraph markers (but not character markers)
                for j, bit in enumerate(bits):
                    #dPrint( 'Quiet', debuggingThisModule, "loop", j, repr(bit), repr(verseString) )
                    if j == 0:
                        thisBook.addLine(
                            'v',
                            verseNumberString + ' ' + verseString.rstrip())
                    else:
                        if bit[1] == ' ': marker, bit = bit[0], bit[2:]
                        elif bit[2] == ' ': marker, bit = bit[0:2], bit[3:]
                        else: halt
                        #dPrint( 'Quiet', debuggingThisModule, "mV", marker, repr(bit), repr(verseString) )
                        thisBook.addLine(marker, bit.rstrip())
            self.stashBook(thisBook)
        self.doPostLoadProcessing()
Пример #14
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        status = 0  # 1 = getting chapters, 2 = getting verse data
        lastLine, lineCount = '', 0
        BBB = lastBBB = None
        bookDetails = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if lineCount == 1:
                    if line[0] == chr(65279):  #U+FEFF
                        logging.info(
                            "DrupalBible.load1: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(self.sourceFilepath))
                        line = line[
                            1:]  # Remove the UTF-16 Unicode Byte Order Marker (BOM)
                    elif line[:3] == '':  # 0xEF,0xBB,0xBF
                        logging.info(
                            "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}"
                            .format(self.sourceFilepath))
                        line = line[
                            3:]  # Remove the UTF-8 Unicode Byte Order Marker (BOM)
                if line and line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines

                #dPrint( 'Quiet', debuggingThisModule, 'DB file line is "' + line + '"' )
                if line[0] == '#': continue  # Just discard comment lines
                lastLine = line
                if lineCount == 1:
                    if line != '*Bible':
                        logging.warning(
                            "Unknown DrupalBible first line: {}".format(
                                repr(line)))

                elif status == 0:
                    if line == '*Chapter': status = 1
                    else:  # Get the version name details
                        bits = line.split('|')
                        shortName, fullName, language = bits
                        self.name = fullName

                elif status == 1:
                    if line == '*Context': status = 2
                    else:  # Get the book name details
                        bits = line.split('|')
                        bookCode, bookFullName, bookShortName, numChapters = bits
                        assert bookShortName == bookCode
                        BBBresult = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromDrupalBibleCode(
                            bookCode)
                        BBB = BBBresult if isinstance(
                            BBBresult, str
                        ) else BBBresult[
                            0]  # Result can be string or list of strings (best guess first)
                        bookDetails[
                            BBB] = bookFullName, bookShortName, numChapters

                elif status == 2:  # Get the verse text
                    bits = line.split('|')
                    bookCode, chapterNumberString, verseNumberString, lineMark, verseText = bits
                    #chapterNumber, verseNumber = int( chapterNumberString ), int( verseNumberString )
                    if lineMark:
                        vPrint('Quiet', debuggingThisModule, repr(lineMark))
                        halt
                    BBBresult = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromDrupalBibleCode(
                        bookCode)
                    BBB = BBBresult if isinstance(
                        BBBresult, str
                    ) else BBBresult[
                        0]  # Result can be string or list of strings (best guess first)
                    if BBB != lastBBB:
                        if lastBBB is not None:
                            self.stashBook(thisBook)
                        thisBook = BibleBook(self, BBB)
                        thisBook.objectNameString = 'DrupalBible Bible Book object'
                        thisBook.objectTypeString = 'DrupalBible'
                        lastChapterNumberString = None
                        lastBBB = BBB
                    if chapterNumberString != lastChapterNumberString:
                        thisBook.addLine('c', chapterNumberString)
                        lastChapterNumberString = chapterNumberString
                    verseText = verseText.replace('<', '\\it ').replace(
                        '>', '\\it*')
                    thisBook.addLine('v', verseNumberString + ' ' + verseText)

                else:
                    halt

        # Save the final book
        self.stashBook(thisBook)
        self.doPostLoadProcessing()