def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search('^; TITLE:\\s', line)
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print("First line got type {!r} match from {!r}".
                                  format(match.group(0), line))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}"
                                .format(firstLine, thisFilename))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue

                #print ( 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith('; TITLE:'):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith('; ABBREVIATION:'):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith('; HAS ITALICS'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES:'):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS REDLETTER'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0] == ';':
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}"
                        .format(line))
                    continue  # Just discard comment lines

                # Process the main segment
                if line.startswith('$$ '):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #print( "pointer", repr(pointer) )
                    if pointer and pointer[0] == '{' and pointer[-1] == '}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #print( "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else:  # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split(' ', 1)
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split(
                                ':')
                            chapterNumber = int(chapterNumberString)
                            verseNumber = int(verseNumberString)
                            if bookCode != lastBookCode:  # We've started a new book
                                if bookCode in ('Ge', ): BBB = 'GEN'
                                elif bookCode in ('Le', ): BBB = 'LEV'
                                elif bookCode in ('La', ):
                                    BBB = 'LAM'
                                    ##elif bookCode in ('Es',): BBB = 'EST'
                                    ##elif bookCode in ('Pr',): BBB = 'PRO'
                                    #elif bookCode in ('So',): BBB = 'SNG'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #print( "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText(
                                        bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOS81.getBBBFromText(
                                            bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOSx.getBBBFromText(
                                            bookCode)  # Try to guess
                                    #print( "4BBB =", repr(BBB) )
                        else:
                            print("Unexpected number of bits", self.givenName,
                                  BBB, bookCode, chapterNumberString,
                                  verseNumberString, len(bits), bits)
                    continue  # Just save the pointer information which refers to the text on the next line
                else:  # it's not a $$ line
                    text = line
                    #print( "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else
                                             '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace('(<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>)', '\\x*')
                        vText = vText.replace('<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>', '\\x*')
                        #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) )
                        #print( BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search('\\{(.+?)\\}', vText)
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format(
                                chapterNumber, verseNumber, match.group(1))
                            vText = vText[:match.start(
                            )] + footnoteText + vText[
                                match.end():]  # Replace this footnote
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\{(.+?)\\}', vText)
                        # Convert [stuff] to added fields
                        match = re.search('\\[(.+?)\\]', vText)
                        while match:
                            addText = '\\add {}\\add*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\[(.+?)\\]', vText)
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search('\\+r/(.+?)-r/', vText)
                        while match:
                            addText = '\\wj {}\\wj*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\+r/(.+?)-r/', vText)
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning(
                                    "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}"
                                    .format(BBB, chapterNumberString,
                                            verseNumberString, vText))
                                break

                if bookCode:
                    if bookCode != lastBookCode:  # We've started a new book
                        if lastBookCode != -1:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "ForgeForSwordSearcherBible could not figure out {!r} book code"
                                .format(bookCode))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCode,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCode yet
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}"
                        .format(line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #print( "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata(
                'Forge4SS')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()
示例#2
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()
示例#3
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if Globals.verbosityLevel > 2: print( _("Loading {}...").format( self.sourceFilepath ) )
        loadErrors = []

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical( "{} doesn't appear to be a e-Sword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BibleFilenameEndingsToAccept[0] ):
            logging.critical( "{} doesn't appear to be a e-Sword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute( 'select * from Details' )
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict: logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.settingsDict['encryption'] ) )


        # Just get some information from the file
        cursor.execute( 'select * from Bible' )
        rows = cursor.fetchall()
        numRows = len(rows)
        if Globals.debugFlag or Globals.verbosityLevel>2: print( '{} rows found'.format( numRows ) )
        BBBn1 = rows[0][0]
        if Globals.debugFlag or Globals.verbosityLevel>2: print( 'First book number is {}'.format( BBBn1 ) )
        del rows
        BBB1 = None
        if BBBn1 <= 66: BBB1 = Globals.BibleBooksCodes.getBBBFromReferenceNumber( BBBn1 )


        testament = BBB = None
        booksExpected = textLineCountExpected = 0
        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957
        elif self.settingsDict['Abbreviation'] == 'VIN2011': # Handle encoding error
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['Apocrypha']: # incomplete
            testament, BBB = 'AP', 'XXX'
            booksExpected, textLineCountExpected = 99, 999999
            halt
        if not BBB:
            logging.critical( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            loadErrors.append( "e-Sword settings encoding error -- no testament set: {}".format( self.settingsDict ) )
            if 0:
                cursor.execute( 'select * from Bible' )
                rows = cursor.fetchall()
                print( "rows", len(rows) )
                for row in rows:
                    assert( len(row) == 4 )
                    BBBn, C, V, text = row # First three are integers, the last is a string
                    print( BBBn, C, V, repr(text) )
                    if C==2: break
                del rows # Takes a lot of memory
        if Globals.debugFlag or Globals.verbosityLevel>2:
            print( "Testament={} BBB={} BBB1={}, bE={}, tLCE={} nR={}".format( testament, BBB, BBB1, booksExpected, textLineCountExpected, numRows ) )
        if BBB1 != BBB:
            logging.critical( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            loadErrors.append( "First book seems wrong: {} instead of {}".format( BBB1, BBB ) )
            if not BBB: BBB = BBB1
        if numRows != textLineCountExpected:
            logging.critical( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
            loadErrors.append( "Row count seems wrong: {} instead of {}".format( numRows, textLineCountExpected ) )
        #halt

        BOS = BibleOrganizationalSystem( "GENERIC-KJV-66-ENG" )

        # Create the first book
        thisBook = BibleBook( self.name, BBB )
        thisBook.objectNameString = "e-Sword Bible Book object"
        thisBook.objectTypeString = "e-Sword"

        verseList = BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = cursor.fetchone()
                line = row[0]
            except: # This reference is missing
                #print( "something wrong at", BBB, C, V )
                #if Globals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'e-Sw file line is "' + line + '"' )
            if line is None: logging.warning( "ESwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.settingsDict:
                        logging.critical( "ESwordBible.load: Unable to decrypt verse line at {} {}:{} {}".format( BBB, C, V, repr(line) ) )
                        break
                    else:
                        logging.critical( "ESwordBible.load: Probably encrypted module: Unable to decode verse line at {} {}:{} {} {}".format( BBB, C, V, repr(line), self.settingsDict ) )
                        break
                elif not line: logging.warning( "ESwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    if '\r' in line or '\n' in line:
                        if Globals.debugFlag:
                            logging.warning( "ESwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                        #print( repr(line) )
                    while line and line[-1] in '\r\n': line = line[:-1] # Remove CR/LFs from the end
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' ) # Replace CR/LFs in the middle

            #print( "e-Sword.load", BBB, C, V, repr(line) )
            self.handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if Globals.verbosityLevel > 3: print( "Saving", BBB, bookCount+1 )
                        self.saveBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self.name, BBB )
                    thisBook.objectNameString = "e-Sword Bible Book object"
                    thisBook.objectTypeString = "e-Sword"
                    haveLines = False

                    verseList = BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = Globals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.appendLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.appendLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.appendLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        if Globals.strictCheckingFlag or Globals.debugFlag: self.checkForExtraMaterial( cursor, BOS )
        cursor.close()
        if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
        self.doPostLoadProcessing()
示例#4
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganizationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganizationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None
        lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search(
                        '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line)
                    if match: vplType = 1
                    else:
                        match = re.search('^(\\d{8})\\s', line)
                        if match: vplType = 2
                        else:
                            match = re.search('^# language_name:\\s', line)
                            if match: vplType = 3
                            #else:
                            #match = re.search( '^; TITLE:\\s', line )
                            #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print(
                                "First line got type #{} {!r} match from {!r}".
                                format(vplType, match.group(0), line))
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print(
                                "VPLBible.load: (unexpected) first line was {!r} in {}"
                                .format(line, self.sourceFilepath))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue
                    #print( 'vplType', vplType )

                #print ( 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if line.startswith('# language_name:'):
                        string = line[16:].strip()
                        if string and string != 'Not available':
                            settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith('# closest ISO 639-3:'):
                        string = line[20:].strip()
                        if string and string != 'Not available':
                            settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith('# year_short:'):
                        string = line[13:].strip()
                        if string and string != 'Not available':
                            settingsDict['Year.short'] = string
                        continue
                    elif line.startswith('# year_long:'):
                        string = line[12:].strip()
                        if string and string != 'Not available':
                            settingsDict['Year.long'] = string
                        continue
                    elif line.startswith('# title:'):
                        string = line[8:].strip()
                        if string and string != 'Not available':
                            settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith('# URL:'):
                        string = line[6:].strip()
                        if string and string != 'Not available':
                            settingsDict['URL'] = string
                        continue
                    elif line.startswith('# copyright_short:'):
                        string = line[18:].strip()
                        if string and string != 'Not available':
                            settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith('# copyright_long:'):
                        string = line[17:].strip()
                        if string and string != 'Not available':
                            settingsDict['Copyright.long'] = string
                        continue
                    elif line[0] == '#':
                        logging.warning(
                            "VPLBible.load {} is skipping unknown line: {}".
                            format(vplType, line))
                        continue  # Just discard comment lines
                #elif vplType == 4:
                #if line.startswith( '; TITLE:' ):
                #string = line[8:].strip()
                #if string: settingsDict['TITLE'] = string
                #continue
                #elif line.startswith( '; ABBREVIATION:' ):
                #string = line[15:].strip()
                #if string: settingsDict['ABBREVIATION'] = string
                #continue
                #elif line.startswith( '; HAS ITALICS:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_ITALICS'] = string
                #continue
                #elif line.startswith( '; HAS FOOTNOTES:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_FOOTNOTES'] = string
                #continue
                #elif line.startswith( '; HAS FOOTNOTES' ):
                #string = line[14:].strip()
                #if string: settingsDict['HAS_FOOTNOTES'] = string
                #continue
                #elif line.startswith( '; HAS REDLETTER:' ):
                #string = line[15:].strip()
                #if string: settingsDict['HAS_REDLETTER'] = string
                #continue
                #elif line[0]==';':
                #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split(' ', 2)
                    #print( self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCodeText, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split(
                            ':')
                        #print( "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) )
                        if chapterNumberString == '':
                            chapterNumberString = '1'  # Handle a bug in some single chapter books in VPL
                    else:
                        print("Unexpected number of bits", self.givenName, BBB,
                              bookCodeText, chapterNumberString,
                              verseNumberString, len(bits), bits)

                    if not bookCodeText and not chapterNumberString and not verseNumberString:
                        print("Skipping empty line in {} {} {} {}:{}".format(
                            self.givenName, BBB, bookCodeText,
                            chapterNumberString, verseNumberString))
                        continue
                    if BibleOrgSysGlobals.debugFlag:
                        assert 2 <= len(bookCodeText) <= 4
                    if BibleOrgSysGlobals.debugFlag:
                        assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error(
                            "Invalid verse number field at {}/{} {}:{!r}".
                            format(bookCodeText, BBB, chapterNumberString,
                                   verseNumberString))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int(chapterNumberString)
                    verseNumber = int(verseNumberString)

                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        lastBBB = BBB
                        #if bookCodeText in ('Ge',): BBB = 'GEN'
                        if bookCodeText == 'Le' and lastBBB == 'GEN':
                            BBB = 'LEV'
                        elif bookCodeText in ('Jud', ) and lastBBB == 'JOS':
                            BBB = 'JDG'
                            #elif bookCodeText in ('Es',): BBB = 'EST'
                            #elif bookCodeText in ('Pr',): BBB = 'PRO'
                            #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG'
                            #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM'
                            #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP'
                            #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT
                            #elif bookCodeText in ('Jude',): BBB = 'JDE'
                            #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ'
                            #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN'
                        else:
                            BBB = BOS66.getBBBFromText(
                                bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BOS81.getBBBFromText(
                                    bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BOSx.getBBBFromText(
                                    bookCodeText)  # Try to guess
                            if not BBB:
                                BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText(
                                    bookCodeText)  # Try to guess
                        if not BBB:
                            logging.critical(
                                "VPL Bible: Unable to determine book code from text {!r} after {!r}={}"
                                .format(bookCodeText, lastBookCodeText,
                                        lastBBB))
                            halt

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0] == '«':
                        #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB == 'PSA' and verseNumberString == '1':  # Psalm title
                            vBits = vText[1:].split('»')
                            #print( "vBits", vBits )
                            thisBook.addLine('d', vBits[0])  # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                    #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #continue
                    if BBB == 'PSA' and verseNumberString == '1' and vText.startswith(
                            '&lt;') and self.givenName == 'basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                    #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                    #if vText == lastVText:
                    #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #else:
                    #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                elif vplType in (2, 3):
                    bits = line.split('\t', 1)
                    #print( self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[
                        0][:2], bits[0][2:5], bits[0][5:]
                    #print( bookNumberString, chapterNumberString, verseNumberString )
                    while len(chapterNumberString
                              ) > 1 and chapterNumberString[0] == '0':
                        chapterNumberString = chapterNumberString[
                            1:]  # Remove leading zeroes
                    while len(verseNumberString
                              ) > 1 and verseNumberString[0] == '0':
                        verseNumberString = verseNumberString[
                            1:]  # Remove leading zeroes
                    bookCodeText, chapterNumber, verseNumber = int(
                        bookNumberString), int(chapterNumberString), int(
                            verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        lastBBB = BBB
                        bnDict = {
                            67: 'TOB',
                            68: 'JDT',
                            69: 'ESG',
                            70: 'WIS',
                            71: 'SIR',
                            72: 'BAR',
                            73: 'LJE',
                            74: 'PAZ',
                            75: 'SUS',
                            76: 'BEL',
                            77: 'MA1',
                            78: 'MA2',
                            79: 'MA3',
                            80: 'MA4',
                            81: 'ES1',
                            82: 'ES2',
                            83: 'MAN',
                            84: 'PS2',
                            85: 'PSS',
                            86: 'ODE',
                        }
                        if 1 <= bookCodeText <= 66:
                            BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber(
                                bookCodeText)
                        else:
                            BBB = bnDict[bookCodeText]

                #elif vplType == 4:
                #if line.startswith( '$$ ' ):
                #if metadataName and metadataContents:
                #settingsDict[metadataName] = metadataContents
                #metadataName = None
                #pointer = line[3:]
                ##print( "pointer", repr(pointer) )
                #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                #metadataName = pointer[1:-1]
                #if metadataName:
                ##print( "metadataName", repr(metadataName) )
                #metadataContents = ''
                #else: # let's assume it's a BCV reference
                #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                #B_CV_Bits = pointer.split( ' ', 1 )
                #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                #bookCodeText, CVString = B_CV_Bits
                #chapterNumberString, verseNumberString = CVString.split( ':' )
                #chapterNumber = int( chapterNumberString )
                #verseNumber = int( verseNumberString )
                #if bookCodeText != lastBookCodeText: # We've started a new book
                #if bookCodeText in ('Ge',): BBB = 'GEN'
                #elif bookCodeText in ('Le',): BBB = 'LEV'
                #elif bookCodeText in ('La',): BBB = 'LAM'
                #else:
                ##print( "4bookCodeText =", repr(bookCodeText) )
                ##BBB = BOS.getBBBFromText( bookCodeText )  # Try to guess
                #BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                ##print( "4BBB =", repr(BBB) )
                #else: print( "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )
                #continue # Just save the pointer information which refers to the text on the next line
                #else: # it's not a $$ line
                #text = line
                ##print( "text", repr(text) )
                #if metadataName:
                #metadataContents += ('\n' if metadataContents else '') + text
                #continue
                #else:
                #vText = text
                ## Handle bits like (<scripref>Pr 2:7</scripref>)
                #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                ##if '\\' in vText: print( 'VPL vText', repr(vText) )
                #if vplType == 4: # Forge for SwordSearcher
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                ## Convert {stuff} to footnotes
                #match = re.search( '\\{(.+?)\\}', vText )
                #while match:
                #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                #match = re.search( '\\{(.+?)\\}', vText )
                ## Convert [stuff] to added fields
                #match = re.search( '\\[(.+?)\\]', vText )
                #while match:
                #addText = '\\add {}\\add*'.format( match.group(1) )
                #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                #match = re.search( '\\[(.+?)\\]', vText )
                #for badChar in '{}[]':
                #if badChar in vText:
                #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                #break

                else:
                    logging.critical('Unknown VPL type {}'.format(vplType))
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                        halt

                if bookCodeText:
                    if bookCodeText != lastBookCodeText:  # We've started a new book
                        if lastBookCodeText is not None:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCodeText = bookCodeText
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "VPLBible{} could not figure out {!r} book code"
                                .format(vplType, bookCodeText))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCodeText,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCodeText,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCodeText,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCodeText,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCodeText yet
                    logging.warning(
                        "VPLBible.load{} is skipping unknown pre-book line: {}"
                        .format(vplType, line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #print( "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata('VPL')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()
示例#5
0
class MySwordBible( Bible ):
    """
    Class for reading, validating, and converting MySwordBible files.
    """
    def __init__( self, sourceFolder, givenFilename, encoding='utf-8' ):
        """
        Constructor: just sets up the Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = 'MySword Bible object'
        self.objectTypeString = 'MySword'

        # Now we can set our object variables
        self.sourceFolder, self.sourceFilename, self.encoding = sourceFolder, givenFilename, encoding
        self.sourceFilepath =  os.path.join( self.sourceFolder, self.sourceFilename )

        # Do a preliminary check on the readability of our file
        if not os.access( self.sourceFilepath, os.R_OK ):
            logging.critical( _("MySwordBible: File {!r} is unreadable").format( self.sourceFilepath ) )

        filenameBits = os.path.splitext( self.sourceFilename )
        self.name = filenameBits[0]
        self.fileExtension = filenameBits[1]

        #if self.fileExtension.upper().endswith('X'):
            #logging.warning( _("MySwordBible: File {!r} is encrypted").format( self.sourceFilepath ) )
    # end of MySwordBible.__init__


    def preload( self ):
        """
        Load the metadata from the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("preload()") )

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Preloading {}…").format( self.sourceFilepath ) )

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in FILENAME_ENDINGS_TO_ACCEPT:
            logging.critical( "{} doesn't appear to be a MySword file".format( self.sourceFilename ) )
        elif not self.sourceFilename.upper().endswith( BIBLE_FILENAME_ENDINGS_TO_ACCEPT[0] ):
            logging.critical( "{} doesn't appear to be a MySword Bible file".format( self.sourceFilename ) )

        connection = sqlite3.connect( self.sourceFilepath )
        connection.row_factory = sqlite3.Row # Enable row names
        self.cursor = connection.cursor()

        # First get the settings
        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['MySword'] = {}
        self.cursor.execute( 'select * from Details' )
        row = self.cursor.fetchone()
        for key in row.keys():
            self.suppliedMetadata['MySword'][key] = row[key]
        #print( self.suppliedMetadata['MySword'] ); halt
        #if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        #if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.suppliedMetadata['MySword']:
            logging.critical( "{} is encrypted: level {}".format( self.sourceFilename, self.suppliedMetadata['MySword']['encryption'] ) )

        self.BOS = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )

        self.preloadDone = True
    # end of MySwordBible.preload


    def load( self ):
        """
        Load all the books out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("load()") )
        assert self.preloadDone

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )


        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C > numC: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3: print( "  MySword saving", BBB, bookCount+1 )
                        self.stashBook( thisBook )
                    #else: print( "Not saving", BBB )
                    bookCount += 1 # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BOS.getNextBookCode( BBB )
                    # Create the next book
                    thisBook = BibleBook( self, BBB )
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BOS.getNumVersesList( BBB )
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        self.applySuppliedMetadata( 'MySword' ) # Copy some to self.settingsDict
        self.doPostLoadProcessing()
    # end of MySwordBible.load


    def loadBook( self, BBB ):
        """
        Load the requested book out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print( exp("loadBook( {} )").format( BBB ) )
        assert self.preloadDone

        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag: print( "  {} is already loaded -- returning".format( BBB ) )
            return # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning( "We had already tried loading MySwordBible {} for {}".format( BBB, self.name ) )
            return # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag: print( _("MySwordBible: Loading {} from {}…").format( BBB, self.sourceFilepath ) )

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
            #testament, BBB = 'BOTH', 'GEN'
            #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
            #testament, BBB = 'OT', 'GEN'
            #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
            #testament, BBB = 'NT', 'MAT'
            #booksExpected, textLineCountExpected = 1, 7957


        # Create the first book
        thisBook = BibleBook( self, BBB )
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList( BBB )
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber( BBB )
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute('select Scripture from Bible where Book=? and Chapter=? and Verse=?', (nBBB,C,V) )
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError: # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None: logging.warning( "MySwordBible.load: Found missing verse line at {} {}:{}".format( BBB, C, V ) )
            else: # line is not None
                if not isinstance( line, str ):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical( "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}".format( BBB, C, V, line ) )
                        break
                    else:
                        logging.critical( "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}".format( BBB, C, V, line, self.suppliedMetadata['MySword'] ) )
                elif not line: logging.warning( "MySwordBible.load: Found blank verse line at {} {}:{}".format( BBB, C, V ) )
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n': line = line[:-1]
                    if '\r' in line or '\n' in line: # (in the middle)
                        logging.warning( "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}".format( BBB, C, V ) )
                    line = line.replace( '\r\n', ' ' ).replace( '\r', ' ' ).replace( '\n', ' ' )

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine( self.name, BBB, C, V, line, thisBook, ourGlobals )
            V += 1
            if V > numV:
                C += 1
                if C <= numC: # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C-1]
                    V = 1
                else: # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "  MySword saving", BBB )
                        self.stashBook( thisBook )
                    #else: print( "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine( 'p', '' )
                ourGlobals['haveParagraph'] = False
示例#6
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' )
        if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search( '^; TITLE:\\s', line )
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "First line got type {!r} match from {!r}".format( match.group(0), line ) )
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}".format( firstLine, thisFilename ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue

                #print ( 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith( '; TITLE:' ):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith( '; ABBREVIATION:' ):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith( '; HAS ITALICS' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith( '; HAS FOOTNOTES:' ):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith( '; HAS FOOTNOTES' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith( '; HAS REDLETTER' ):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0]==';':
                    logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}".format( line ) )
                    continue # Just discard comment lines

                # Process the main segment
                if line.startswith( '$$ ' ):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #print( "pointer", repr(pointer) )
                    if pointer and pointer[0]=='{' and pointer[-1]=='}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #print( "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else: # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split( ' ', 1 )
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split( ':' )
                            chapterNumber = int( chapterNumberString )
                            verseNumber = int( verseNumberString )
                            if bookCode != lastBookCode: # We've started a new book
                                if bookCode in ('Ge',): BBB = 'GEN'
                                elif bookCode in ('Le',): BBB = 'LEV'
                                elif bookCode in ('La',): BBB = 'LAM'
                                ##elif bookCode in ('Es',): BBB = 'EST'
                                ##elif bookCode in ('Pr',): BBB = 'PRO'
                                #elif bookCode in ('So',): BBB = 'SNG'
                                #elif bookCode in ('La',): BBB = 'LAM'
                                #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #print( "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                                    if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                                    if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess
                                    #print( "4BBB =", repr(BBB) )
                        else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )
                    continue # Just save the pointer information which refers to the text on the next line
                else: # it's not a $$ line
                    text = line
                    #print( "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                        vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                        #if '\\' in vText: print( 'ForgeForSwordSearcher vText', repr(vText) )
                        #print( BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search( '\\{(.+?)\\}', vText )
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                            vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\{(.+?)\\}', vText )
                        # Convert [stuff] to added fields
                        match = re.search( '\\[(.+?)\\]', vText )
                        while match:
                            addText = '\\add {}\\add*'.format( match.group(1) )
                            vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\[(.+?)\\]', vText )
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search( '\\+r/(.+?)-r/', vText )
                        while match:
                            addText = '\\wj {}\\wj*'.format( match.group(1) )
                            vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                            #print( BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search( '\\+r/(.+?)-r/', vText )
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning( "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                break


                if bookCode:
                    if bookCode != lastBookCode: # We've started a new book
                        if lastBookCode != -1: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "ForgeForSwordSearcherBible could not figure out {!r} book code".format( bookCode ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCode yet
                    logging.warning( "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}".format( line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #print( "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata( 'Forge4SS' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()
示例#7
0
class MySwordBible(Bible):
    """
    Class for reading, validating, and converting MySwordBible files.
    """
    def __init__(self, sourceFolder, givenFilename, encoding='utf-8'):
        """
        Constructor: just sets up the Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = 'MySword Bible object'
        self.objectTypeString = 'MySword'

        # Now we can set our object variables
        self.sourceFolder, self.sourceFilename, self.encoding = sourceFolder, givenFilename, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder,
                                           self.sourceFilename)

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            logging.critical(
                _("MySwordBible: File {!r} is unreadable").format(
                    self.sourceFilepath))

        filenameBits = os.path.splitext(self.sourceFilename)
        self.name = filenameBits[0]
        self.fileExtension = filenameBits[1]

        #if self.fileExtension.upper().endswith('X'):
        #logging.warning( _("MySwordBible: File {!r} is encrypted").format( self.sourceFilepath ) )

    # end of MySwordBible.__init__

    def preload(self):
        """
        Load the metadata from the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("preload()"))

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Preloading {}…").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in FILENAME_ENDINGS_TO_ACCEPT:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BIBLE_FILENAME_ENDINGS_TO_ACCEPT[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        self.cursor = connection.cursor()

        # First get the settings
        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['MySword'] = {}
        self.cursor.execute('select * from Details')
        row = self.cursor.fetchone()
        for key in row.keys():
            self.suppliedMetadata['MySword'][key] = row[key]
        #print( self.suppliedMetadata['MySword'] ); halt
        #if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        #if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.suppliedMetadata['MySword']:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename,
                self.suppliedMetadata['MySword']['encryption']))

        self.BOS = BibleOrganizationalSystem('GENERIC-KJV-66-ENG')

        self.preloadDone = True

    # end of MySwordBible.preload

    def load(self):
        """
        Load all the books out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("load()"))
        assert self.preloadDone

        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[
                'MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Found missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("  MySword saving", BBB, bookCount + 1)
                        self.stashBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BOS.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BOS.getNumVersesList(BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        self.applySuppliedMetadata('MySword')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

    # end of MySwordBible.load

    def loadBook(self, BBB):
        """
        Load the requested book out of the SQLite3 database.
        """
        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
            print(exp("loadBook( {} )").format(BBB))
        assert self.preloadDone

        if BBB in self.books:
            if BibleOrgSysGlobals.debugFlag:
                print("  {} is already loaded -- returning".format(BBB))
            return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading MySwordBible {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            print(
                _("MySwordBible: Loading {} from {}…").format(
                    BBB, self.sourceFilepath))

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'BOTH', 'GEN'
        #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
        #testament, BBB = 'OT', 'GEN'
        #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'NT', 'MAT'
        #booksExpected, textLineCountExpected = 1, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BOS.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Found missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C <= numC:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1
                else:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 2:
                            print("  MySword saving", BBB)
                        self.stashBook(thisBook)
                    #else: print( "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
示例#8
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganizationalSystem( 'GENERIC-KJV-81-ENG' )
        if BOSx is None: BOSx = BibleOrganizationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line )
                    if match: vplType = 1
                    else:
                        match = re.search( '^(\\d{8})\\s', line )
                        if match: vplType = 2
                        else:
                            match = re.search( '^# language_name:\\s', line )
                            if match: vplType = 3
                            #else:
                                #match = re.search( '^; TITLE:\\s', line )
                                #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            print( "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) )
                    else:
                        if BibleOrgSysGlobals.verbosityLevel > 2: print( "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue
                    #print( 'vplType', vplType )

                #print ( 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if   line.startswith( '# language_name:' ):
                        string = line[16:].strip()
                        if string and string != 'Not available': settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith( '# closest ISO 639-3:' ):
                        string = line[20:].strip()
                        if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith( '# year_short:' ):
                        string = line[13:].strip()
                        if string and string != 'Not available': settingsDict['Year.short'] = string
                        continue
                    elif line.startswith( '# year_long:' ):
                        string = line[12:].strip()
                        if string and string != 'Not available': settingsDict['Year.long'] = string
                        continue
                    elif line.startswith( '# title:' ):
                        string = line[8:].strip()
                        if string and string != 'Not available': settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith( '# URL:' ):
                        string = line[6:].strip()
                        if string and string != 'Not available': settingsDict['URL'] = string
                        continue
                    elif line.startswith( '# copyright_short:' ):
                        string = line[18:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith( '# copyright_long:' ):
                        string = line[17:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.long'] = string
                        continue
                    elif line[0]=='#':
                        logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) )
                        continue # Just discard comment lines
                #elif vplType == 4:
                    #if line.startswith( '; TITLE:' ):
                        #string = line[8:].strip()
                        #if string: settingsDict['TITLE'] = string
                        #continue
                    #elif line.startswith( '; ABBREVIATION:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['ABBREVIATION'] = string
                        #continue
                    #elif line.startswith( '; HAS ITALICS:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_ITALICS'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES' ):
                        #string = line[14:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS REDLETTER:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_REDLETTER'] = string
                        #continue
                    #elif line[0]==';':
                        #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                        #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split( ' ', 2 )
                    #print( self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCode, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split( ':' )
                    else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )

                    if not bookCode and not chapterNumberString and not verseNumberString:
                        print( "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        continue
                    if BibleOrgSysGlobals.debugFlag: assert 2  <= len(bookCode) <= 4
                    if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCode, BBB, chapterNumberString, verseNumberString ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int( chapterNumberString )
                    verseNumber = int( verseNumberString )

                    if bookCode != lastBookCode: # We've started a new book
                        #if bookCode in ('Ge',): BBB = 'GEN'
                        if bookCode in ('Le',): BBB = 'LEV'
                        elif bookCode in ('Jud',): BBB = 'JDG'
                        #elif bookCode in ('Es',): BBB = 'EST'
                        #elif bookCode in ('Pr',): BBB = 'PRO'
                        elif bookCode in ('So',): BBB = 'SNG'
                        elif bookCode in ('La',): BBB = 'LAM'
                        #elif bookCode in ('Jude',): BBB = 'JDE'
                        else:
                            #BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromText( bookCode )  # Try to guess
                            BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                            if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                            if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0]=='«':
                        #print( "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB=='PSA' and verseNumberString=='1': # Psalm title
                            vBits = vText[1:].split( '»' )
                            #print( "vBits", vBits )
                            thisBook.addLine( 'd', vBits[0] ) # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                        #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        #continue
                    if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                        #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                        #if vText == lastVText:
                            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        #else:
                            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                elif vplType in (2,3):
                    bits = line.split( '\t', 1 )
                    #print( self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:]
                    #print( bookNumberString, chapterNumberString, verseNumberString )
                    while len(chapterNumberString)>1 and chapterNumberString[0]=='0':
                        chapterNumberString = chapterNumberString[1:] # Remove leading zeroes
                    while len(verseNumberString)>1 and verseNumberString[0]=='0':
                        verseNumberString = verseNumberString[1:] # Remove leading zeroes
                    bookCode, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCode != lastBookCode: # We've started a new book
                        bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS',
                                76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2',
                                85:'PSS', 86:'ODE', }
                        if 1 <= bookCode <= 66: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookCode )
                        else: BBB = bnDict[bookCode]

                #elif vplType == 4:
                    #if line.startswith( '$$ ' ):
                        #if metadataName and metadataContents:
                            #settingsDict[metadataName] = metadataContents
                            #metadataName = None
                        #pointer = line[3:]
                        ##print( "pointer", repr(pointer) )
                        #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                            #metadataName = pointer[1:-1]
                            #if metadataName:
                                ##print( "metadataName", repr(metadataName) )
                                #metadataContents = ''
                        #else: # let's assume it's a BCV reference
                            #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                            #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                            #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                            #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                            #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                            #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                            #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                            #B_CV_Bits = pointer.split( ' ', 1 )
                            #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                                #bookCode, CVString = B_CV_Bits
                                #chapterNumberString, verseNumberString = CVString.split( ':' )
                                #chapterNumber = int( chapterNumberString )
                                #verseNumber = int( verseNumberString )
                                #if bookCode != lastBookCode: # We've started a new book
                                    #if bookCode in ('Ge',): BBB = 'GEN'
                                    #elif bookCode in ('Le',): BBB = 'LEV'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #else:
                                        ##print( "4BookCode =", repr(bookCode) )
                                        ##BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                        #BBB = BOS66.getBBBFromText( bookCode )  # Try to guess
                                        #if not BBB: BBB = BOS81.getBBBFromText( bookCode )  # Try to guess
                                        #if not BBB: BBB = BOSx.getBBBFromText( bookCode )  # Try to guess
                                        ##print( "4BBB =", repr(BBB) )
                            #else: print( "Unexpected number of bits", self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, len(bits), bits )
                        #continue # Just save the pointer information which refers to the text on the next line
                    #else: # it's not a $$ line
                        #text = line
                        ##print( "text", repr(text) )
                        #if metadataName:
                            #metadataContents += ('\n' if metadataContents else '') + text
                            #continue
                        #else:
                            #vText = text
                            ## Handle bits like (<scripref>Pr 2:7</scripref>)
                            #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                            #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                            ##if '\\' in vText: print( 'VPL vText', repr(vText) )
                            #if vplType == 4: # Forge for SwordSearcher
                                ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                ## Convert {stuff} to footnotes
                                #match = re.search( '\\{(.+?)\\}', vText )
                                #while match:
                                    #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                                    #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                                    ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\{(.+?)\\}', vText )
                                ## Convert [stuff] to added fields
                                #match = re.search( '\\[(.+?)\\]', vText )
                                #while match:
                                    #addText = '\\add {}\\add*'.format( match.group(1) )
                                    #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                                    ##print( BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\[(.+?)\\]', vText )
                                #for badChar in '{}[]':
                                    #if badChar in vText:
                                        #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                        #break

                else:
                    logging.critical( 'Unknown VPL type {}'.format( vplType ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt

                if bookCode:
                    if bookCode != lastBookCode: # We've started a new book
                        if lastBookCode != -1: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCode ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCode, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #print( '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCode yet
                    logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #print( "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()
示例#9
0
    def load(self):
        """
        Load a single source file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in filenameEndingsToAccept:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BibleFilenameEndingsToAccept[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        cursor = connection.cursor()

        # First get the settings
        cursor.execute('select * from Details')
        row = cursor.fetchone()
        for key in row.keys():
            self.settingsDict[key] = row[key]
        #print( self.settingsDict ); halt
        if 'Description' in self.settingsDict and len(
                self.settingsDict['Description']) < 40:
            self.name = self.settingsDict['Description']
        if 'Abbreviation' in self.settingsDict:
            self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.settingsDict:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename, self.settingsDict['encryption']))

        if self.settingsDict['OT'] and self.settingsDict['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.settingsDict['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.settingsDict['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        BOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = "MySword Bible Book object"
        thisBook.objectTypeString = "MySword"

        verseList = BOS.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #print( "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #print( row )
                line = None
            #print ( nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Found missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.settingsDict:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {}"
                            .format(BBB, C, V, repr(line)))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {} {}"
                            .format(BBB, C, V, repr(line), self.settingsDict))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #print( "MySword.load", BBB, C, V, repr(line) )
            handleLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        if BibleOrgSysGlobals.verbosityLevel > 3:
                            print("Saving", BBB, bookCount + 1)
                        self.saveBook(thisBook)
                    #else: print( "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = BOS.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = "MySword Bible Book object"
                    thisBook.objectTypeString = "MySword"
                    haveLines = False

                    verseList = BOS.getNumVersesList(BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.BibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
        cursor.close()
        self.doPostLoadProcessing()
def createEasyWorshipBible( BibleObject, outputFolder=None ):
    """
    Write the pseudo USFM out into the compressed EasyWorship format.

    Since we don't have a specification for the format,
        and since we don't know the meaning of all the binary pieces of the file,
        we can't be certain yet that this output will actually work. :-(
    """
    from InternalBibleInternals import BOS_ADDED_NESTING_MARKERS
    import zipfile

    # It seems 7-9 give the correct two header bytes
    ZLIB_COMPRESSION_LEVEL = 9 #  -1=default(=6), 0=none, 1=fastest...9=highest compression level

    if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running createEasyWorshipBible…" )
    if BibleOrgSysGlobals.debugFlag: assert BibleObject.books

    if not BibleObject.doneSetupGeneric: BibleObject.__setupWriter()
    if not outputFolder: outputFolder = 'OutputFiles/BOS_EasyWorshipBible_Export/'
    if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there

    # Set-up their Bible reference system
    BOS = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )

    ignoredMarkers = set()

    # Before we write the file, let's compress all our books
    # Books are written as C:V verseText with double-spaced lines
    compressedDictionary = {}
    for BBB,bookObject in BibleObject.books.items():
        if BBB in ('FRT','INT','BAK','OTH','GLS','XXA','XXB','XXC','XXD','XXE','XXF','XXG',): continue # Ignore these books
        pseudoESFMData = bookObject._processedLines

        textBuffer = ''
        vBridgeStartInt = vBridgeEndInt = None # For printing missing (bridged) verse numbers
        for entry in pseudoESFMData:
            marker, text = entry.getMarker(), entry.getCleanText()
            #print( BBB, marker, text )
            if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS: continue # Just ignore added markers -- not needed here
            elif marker == 'c':
                C = int( text ) # Just so we get an error if we have something different
                V = lastVWritten = '0'
            elif marker == 'v':
                #V = text.replace( '–', '-' ).replace( '—', '-' ) # Replace endash, emdash with hyphen
                V = text
                for bridgeChar in ('-', '–', '—'): # hyphen, endash, emdash
                    ix = V.find( bridgeChar )
                    if ix != -1:
                        if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel>2:
                            print( "createEasyWorshipBible: preparing for verse bridge in {} at {} {}:{}" \
                                        .format( BibleObject.abbreviation, BBB, C, V ) )
                        # Remove verse bridges
                        vStart = V[:ix].replace( 'a', '' ).replace( 'b', '' ).replace( 'c', '' )
                        vEnd = V[ix+1:].replace( 'a', '' ).replace( 'b', '' ).replace( 'c', '' )
                        #print( BBB, repr(vStart), repr(vEnd) )
                        try: vBridgeStartInt, vBridgeEndInt = int( vStart ), int( vEnd )
                        except ValueError:
                            print( "createEasyWorshipBible: bridge doesn't seem to be integers in {} {}:{!r}".format( BBB, C, V ) )
                            vBridgeStartInt = vBridgeEndInt = None # One of them isn't an integer
                        #print( ' ', BBB, repr(vBridgeStartInt), repr(vBridgeEndInt) )
                        VBridgedText = V
                        V = vStart
                        break
            elif marker == 'v~':
                try:
                    if int(V) <= int(lastVWritten):
                        # TODO: Not sure what level the following should be? info/warning/error/critical ????
                        logging.warning( 'createEasyWorshipBible: Maybe duplicating {} {}:{} after {} with {}'.format( BBB, C, V, lastVWritten, text ) )
                        #continue
                except ValueError: pass # had a verse bridge
                if vBridgeStartInt and vBridgeEndInt: # We had a verse bridge
                    if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel>2:
                        print( "createEasyWorshipBible: handling verse bridge in {} at {} {}:{}-{}" \
                                    .format( BibleObject.abbreviation, BBB, C, vBridgeStartInt, vBridgeEndInt ) )
                    if 1: # new code -- copies the bridged text to all verses
                        for vNum in range( vBridgeStartInt, vBridgeEndInt+1 ): # Fill in missing verse numbers
                            textBuffer += ('\r\n\r\n' if textBuffer else '') + '{}:{} ({}) {}'.format( C, vNum, VBridgedText, text )
                    else: # old code
                        textBuffer += ('\r\n\r\n' if textBuffer else '') + '{}:{} ({}) {}'.format( C, vBridgeStartInt, vBridgeEndInt, text )
                        for vNum in range( vBridgeStartInt+1, vBridgeEndInt+1 ): # Fill in missing verse numbers
                            textBuffer += '\r\n\r\n{}:{} (-)'.format( C, vNum )
                    lastVWritten = str( vBridgeEndInt )
                    vBridgeStartInt = vBridgeEndInt = None
                else:
                    textBuffer += ('\r\n\r\n' if textBuffer else '') + '{}:{} {}'.format( C, V, text )
                    lastVWritten = V
            elif marker == 'p~':
                if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag:
                    assert textBuffer # This is a continued part of the verse -- failed with this bad source USFM:
                                        #     \c 1 \v 1 \p These events happened...
                textBuffer += ' {}'.format( text ) # continuation of the same verse
            else:
                ignoredMarkers.add( marker )
        #print( BBB, textBuffer )
        textBuffer = textBuffer \
                        .replace( '“', '"' ).replace( '”', '"' ) \
                        .replace( "‘", "'" ).replace( "’", "'" ) \
                        .replace( '–', '--' ).replace( '—', '--' )
        bookBytes = zlib.compress( textBuffer.encode( 'utf8' ), ZLIB_COMPRESSION_LEVEL )
        #print( BBB, hexlify(bookBytes[:20]), bookBytes )
        assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header
        appendage = b'QK\x03\x04' + struct.pack( '<I', len(textBuffer) ) + b'\x08\x00'
        #print( "appendage", len(appendage), hexlify(appendage), appendage )
        assert len(appendage) == 10
        compressedDictionary[BBB] = bookBytes + appendage

    # Work out the "compressed" (osfuscated) module name
    #name = BibleObject.getAName()
    ##print( 'sn', repr(BibleObject.shortName) )
    #if len(name)>18:
        #if BibleObject.shortName: name = shortName
        #elif name.endswith( ' Version' ): name = name[:-8]
    #name = name.replace( ' ', '' )
    #if not name.startswith( 'ezFree' ): name = 'ezFree' + name
    name = 'ezFree' + ( BibleObject.abbreviation if BibleObject.abbreviation else 'UNK' )
    if len(name)>16: name = name[:16] # Shorten
    encodedNameBytes = zlib.compress( name.encode( 'utf8' ), ZLIB_COMPRESSION_LEVEL )
    if BibleOrgSysGlobals.debugFlag:
        print( 'Name {!r} went from {} to {} bytes'.format( name, len(name), len(encodedNameBytes) ) )
    assert encodedNameBytes[0]==0x78 and encodedNameBytes[1]==0xda # Zlib compression header
    assert len(encodedNameBytes) <= 26

    filename = '{}{}'.format( BibleObject.abbreviation, FILENAME_ENDING ).lower()
    filepath = os.path.join( outputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
    if BibleOrgSysGlobals.verbosityLevel > 2: print( '  createEasyWorshipBible: ' + _("Writing {!r}…").format( filepath ) )
    bookAddress = startingBookAddress = 14872 + len(name) + 18 + 4 # Name is something like ezFreeXXX
    vBridgeStartInt = vBridgeEndInt = None # For printing missing (bridged) verse numbers
    with open( filepath, 'wb' ) as myFile:
        assert myFile.tell() == 0
        # Write the header info to binary file
        myFile.write( b'EasyWorship Bible Text\x1a\x02<\x00\x00\x00\xe0\x00\x00\x00' )
        assert myFile.tell() == 32
        nameBytes = ( BibleObject.getAName() ).encode( 'utf8' )
        myFile.write( nameBytes + b'\x00' * (56 - len(nameBytes)) )
        assert myFile.tell() == 88 # 32 + 56

        # Write the numChapters,numVerses info along with the file position and length
        for BBB in BOS.getBookList():
            #bookName = BibleObject.getAssumedBookName( BBB )
            try: bookName = BibleObject.books[BBB].shortTOCName
            except (KeyError,AttributeError): bookName = None # KeyError if no BBB, AttributeError if no shortTOCName
            #print( len(bookName) if bookName else '', bookName )
            assert bookName is None or len(bookName) <= 51
            if bookName: bookNameBytes = bookName.encode( 'utf8' )
            else: bookNameBytes = b'' # Not compulsory -- will default to English
            myFile.write( bookNameBytes + b'\x00' * (51 - len(bookNameBytes)) )

            numVersesList = BOS.getNumVersesList( BBB )
            numChapters = len( numVersesList )
            myFile.write( struct.pack( 'B', numChapters ) )
            for verseCount in numVersesList: myFile.write( struct.pack( 'B', verseCount ) )
            myFile.write( b'\x00' * (157 - numChapters - 1) )

            try: bookBytes = compressedDictionary[BBB] # if it exists
            except KeyError: # Fill in missing books
                missingString = "1:1 Book not available\r\n\r\n"
                bookBytes = zlib.compress( missingString.encode( 'utf8' ), ZLIB_COMPRESSION_LEVEL )
                assert bookBytes[0]==0x78 and bookBytes[1]==0xda # Zlib compression header
                appendage = b'QK\x03\x04' + struct.pack( '<I', len(missingString) ) + b'\x08\x00'
                assert len(appendage) == 10
                bookBytes += appendage
                compressedDictionary[BBB] = bookBytes
            myFile.write( struct.pack( '<Q', bookAddress ) )
            myFile.write( struct.pack( '<Q', len(bookBytes) ) )
            bookAddress += len(bookBytes)
        assert myFile.tell() == 14872 # 32 + 56 + 224*66

        # Write the "compressed" (osfuscated) module name
        myFile.write( struct.pack( '<I', len(name) + 18 ) )
        assert myFile.tell() == 14876 # 32 + 56 + 224*66 + 4
        myFile.write( encodedNameBytes )

        appendage = b'QK\x03\x04' + struct.pack( 'B', len(name) ) + b'\x00'
        #print( "appendage", len(appendage), hexlify(appendage), appendage )
        assert len(appendage) == 6
        myFile.write( appendage )
        remainderCount = 18 + len(name) - len(encodedNameBytes) - 4 - len(appendage)
        #print( "remainderCount", remainderCount )
        assert remainderCount == 0
        #myFile.write( b'\x00' * remainderCount )
        myFile.write( b'\x00\x00\x08\x00' ) # Not sure what this means
        #if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
            #print( "At", myFile.tell(), 'want', startingBookAddress )
        assert myFile.tell() == startingBookAddress

        # Write the book info to the binary files
        for BBB in BOS.getBookList():
            if BBB in compressedDictionary:
                myFile.write( compressedDictionary[BBB] ) # Write zlib output
            elif BibleOrgSysGlobals.verbosityLevel > 2:
                print( '  Book {} is not available for EasyWorship export'.format( BBB ) )

        # Write the end of file stuff
        myFile.write( b'\x18:\x00\x00\x00\x00\x00\x00ezwBible' )

    if ignoredMarkers:
        logging.info( "createEasyWorshipBible: Ignored markers were {}".format( ignoredMarkers ) )
        if BibleOrgSysGlobals.verbosityLevel > 2:
            print( "  " + _("WARNING: Ignored createEasyWorshipBible markers were {}").format( ignoredMarkers ) )

    # Now create a zipped version
    filepath = os.path.join( outputFolder, filename )
    if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Zipping {} EWB file…".format( filename ) )
    zf = zipfile.ZipFile( filepath+'.zip', 'w', compression=zipfile.ZIP_DEFLATED )
    zf.write( filepath, filename )
    zf.close()

    if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
        print( "  BibleWriter.createEasyWorshipBible finished successfully." )
    return True